Fix CI report pipeline: update tmap v4 syntax, add continuous color scales, fix formatting

- Updated all CI maps to use tm_scale_continuous() for proper tmap v4 compatibility - Added fixed color scale limits (1-8 for CI, -3 to +3 for differences) for consistent field comparison - Fixed YAML header formatting issues in CI_report_dashboard_planet.Rmd - Positioned RGB map before CI overview map as requested - Removed all obsolete use_breaks parameter references - Enhanced error handling and logging throughout the pipeline - Added new experimental analysis scripts and improvements to mosaic creation
2025-06-19 20:37:20 +02:00 · 2025-06-19 20:37:20 +02:00 · 6efcc8cfec
parent bb2a599075
commit 6efcc8cfec
36 changed files with 7601 additions and 3273 deletions
--- a/.Rhistory
+++ b/.Rhistory
@ -1 +1,512 @@
-install.packages("xfun")
+})
 # Verify farm_health_data exists and has content
 if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
 safe_log("farm_health_data not found or empty, generating default data", "WARNING")
 # Create minimal fallback data
 tryCatch({
 # Get fields from boundaries
 fields <- unique(AllPivots0$field)
 # Create basic data frame with just field names
 farm_health_data <- data.frame(
 field = fields,
 mean_ci = rep(NA, length(fields)),
 ci_change = rep(NA, length(fields)),
 ci_uniformity = rep(NA, length(fields)),
 status = rep("Unknown", length(fields)),
 anomaly_type = rep("Unknown", length(fields)),
 priority_level = rep(5, length(fields)), # Low priority
 age_weeks = rep(NA, length(fields)),
 harvest_readiness = rep("Unknown", length(fields)),
 stringsAsFactors = FALSE
 )
 safe_log("Created fallback farm_health_data with basic field information")
 }, error = function(e) {
 safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
 farm_health_data <<- data.frame(
 field = character(),
 mean_ci = numeric(),
 ci_change = numeric(),
 ci_uniformity = numeric(),
 status = character(),
 anomaly_type = character(),
 priority_level = numeric(),
 age_weeks = numeric(),
 harvest_readiness = character(),
 stringsAsFactors = FALSE
 )
 })
 }
 # Calculate farm health summary metrics
 tryCatch({
 # Generate farm health summary data
 farm_health_data <- generate_farm_health_summary(
 field_boundaries = AllPivots0,
 ci_current = CI,
 ci_previous = CI_m1,
 harvesting_data = harvesting_data
 )
 # Log the summary data
 safe_log(paste("Generated farm health summary with", nrow(farm_health_data), "fields"))
 }, error = function(e) {
 safe_log(paste("Error in farm health calculation:", e$message), "ERROR")
 # Create empty dataframe if calculation failed
 farm_health_data <- data.frame(
 field = character(),
 mean_ci = numeric(),
 ci_change = numeric(),
 ci_uniformity = numeric(),
 status = character(),
 anomaly_type = character(),
 priority_level = numeric(),
 age_weeks = numeric(),
 harvest_readiness = character(),
 stringsAsFactors = FALSE
 )
 })
 # Render the velocity and acceleration indicators
 tryCatch({
 # Create and display the indicators using the imported utility function
 velocity_plot <- create_velocity_acceleration_indicator(
 health_data = farm_health_data,
 ci_current = CI,
 ci_prev1 = CI_m1,
 ci_prev2 = CI_m2,
 ci_prev3 = CI_m3,
 field_boundaries = AllPivots0
 )
 # Print the visualization
 print(velocity_plot)
 # Create a table of fields with significant velocity changes
 field_ci_metrics <- list()
 # Process each field to get metrics
 fields <- unique(AllPivots0$field)
 for (field_name in fields) {
 tryCatch({
 # Get field boundary
 field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
 if (nrow(field_shape) == 0) next
 # Extract CI values
 ci_curr_values <- terra::extract(CI, field_shape)
 ci_prev1_values <- terra::extract(CI_m1, field_shape)
 # Calculate metrics
 mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
 mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
 velocity <- mean_ci_curr - mean_ci_prev1
 # Store in list
 field_ci_metrics[[field_name]] <- list(
 field = field_name,
 ci_current = mean_ci_curr,
 ci_prev1 = mean_ci_prev1,
 velocity = velocity
 )
 }, error = function(e) {
 safe_log(paste("Error processing field", field_name, "for velocity table:", e$message), "WARNING")
 })
 }
 # Convert list to data frame
 velocity_df <- do.call(rbind, lapply(field_ci_metrics, function(x) {
 data.frame(
 field = x$field,
 ci_current = round(x$ci_current, 2),
 ci_prev1 = round(x$ci_prev1, 2),
 velocity = round(x$velocity, 2),
 direction = ifelse(x$velocity >= 0, "Improving", "Declining")
 )
 }))
 # Select top 5 positive and top 5 negative velocity fields
 top_positive <- velocity_df %>%
 dplyr::filter(velocity > 0) %>%
 dplyr::arrange(desc(velocity)) %>%
 dplyr::slice_head(n = 5)
 top_negative <- velocity_df %>%
 dplyr::filter(velocity < 0) %>%
 dplyr::arrange(velocity) %>%
 dplyr::slice_head(n = 5)
 # Display the tables if we have data
 if (nrow(top_positive) > 0) {
 cat("<h4>Fields with Fastest Improvement</h4>")
 knitr::kable(top_positive %>%
 dplyr::select(Field = field,
 `Current CI` = ci_current,
 `Previous CI` = ci_prev1,
 `Weekly Change` = velocity))
 }
 if (nrow(top_negative) > 0) {
 cat("<h4>Fields with Fastest Decline</h4>")
 knitr::kable(top_negative %>%
 dplyr::select(Field = field,
 `Current CI` = ci_current,
 `Previous CI` = ci_prev1,
 `Weekly Change` = velocity))
 }
 }, error = function(e) {
 safe_log(paste("Error rendering velocity visualization:", e$message), "ERROR")
 cat("<div class='alert alert-danger'>Error generating velocity visualization.</div>")
 })
 # Generate anomaly timeline visualization
 tryCatch({
 # Use the imported function to create the anomaly timeline
 anomaly_timeline <- create_anomaly_timeline(
 field_boundaries = AllPivots0,
 ci_data = CI_quadrant,
 days_to_include = 90  # Show last 90 days of data
 )
 # Display the timeline
 print(anomaly_timeline)
 }, error = function(e) {
 safe_log(paste("Error generating anomaly timeline:", e$message), "ERROR")
 cat("<div class='alert alert-danger'>Error generating anomaly timeline visualization.</div>")
 })
 # Verify farm_health_data exists and has content
 if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
 safe_log("farm_health_data not found or empty, generating default data", "WARNING")
 # Create minimal fallback data
 tryCatch({
 # Get fields from boundaries
 fields <- unique(AllPivots0$field)
 # Create basic data frame with just field names
 farm_health_data <- data.frame(
 field = fields,
 mean_ci = rep(NA, length(fields)),
 ci_change = rep(NA, length(fields)),
 ci_uniformity = rep(NA, length(fields)),
 status = rep("Unknown", length(fields)),
 anomaly_type = rep("Unknown", length(fields)),
 priority_level = rep(5, length(fields)), # Low priority
 age_weeks = rep(NA, length(fields)),
 harvest_readiness = rep("Unknown", length(fields)),
 stringsAsFactors = FALSE
 )
 safe_log("Created fallback farm_health_data with basic field information")
 }, error = function(e) {
 safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
 farm_health_data <<- data.frame(
 field = character(),
 mean_ci = numeric(),
 ci_change = numeric(),
 ci_uniformity = numeric(),
 status = character(),
 anomaly_type = character(),
 priority_level = numeric(),
 age_weeks = numeric(),
 harvest_readiness = character(),
 stringsAsFactors = FALSE
 )
 })
 }
 # ADVANCED ANALYTICS FUNCTIONS
 # Note: These functions are now imported from executive_report_utils.R
 # The utility file contains functions for velocity/acceleration indicators,
 # anomaly timeline creation, age cohort mapping, and cohort performance charts
 safe_log("Using analytics functions from executive_report_utils.R")
 # Chunk 1: setup_parameters
 # Set up basic report parameters from input values
 report_date <- params$report_date
 mail_day <- params$mail_day
 borders <- params$borders
 use_breaks <- params$use_breaks  # Whether to use breaks or continuous spectrum in visualizations
 # Environment setup notes (commented out)
 # # Activeer de renv omgeving
 # renv::activate()
 # renv::deactivate()
 # # Optioneel: Herstel de omgeving als dat nodig is
 # # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
 # renv::restore()
 # Chunk 2: load_libraries
 # Configure knitr options
 knitr::opts_chunk$set(warning = FALSE, message = FALSE)
 # Path management
 library(here)
 # Spatial data libraries
 library(sf)
 library(terra)
 library(exactextractr)
 # library(raster) - Removed as it's no longer maintained
 # Data manipulation and visualization
 library(tidyverse)  # Includes dplyr, ggplot2, etc.
 library(tmap)
 library(lubridate)
 library(zoo)
 # Machine learning
 library(rsample)
 library(caret)
 library(randomForest)
 library(CAST)
 # Load custom utility functions
 tryCatch({
 source("report_utils.R")
 }, error = function(e) {
 message(paste("Error loading report_utils.R:", e$message))
 # Try alternative path if the first one fails
 tryCatch({
 source(here::here("r_app", "report_utils.R"))
 }, error = function(e) {
 stop("Could not load report_utils.R from either location: ", e$message)
 })
 })
 # Load executive report utilities
 tryCatch({
 source("executive_report_utils.R")
 }, error = function(e) {
 message(paste("Error loading executive_report_utils.R:", e$message))
 # Try alternative path if the first one fails
 tryCatch({
 source(here::here("r_app", "executive_report_utils.R"))
 }, error = function(e) {
 stop("Could not load executive_report_utils.R from either location: ", e$message)
 })
 })
 # Chunk 1: setup_parameters
 # Set up basic report parameters from input values
 report_date <- params$report_date
 mail_day <- params$mail_day
 borders <- params$borders
 use_breaks <- params$use_breaks  # Whether to use breaks or continuous spectrum in visualizations
 # Environment setup notes (commented out)
 # # Activeer de renv omgeving
 # renv::activate()
 # renv::deactivate()
 # # Optioneel: Herstel de omgeving als dat nodig is
 # # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
 # renv::restore()
 # Chunk 2: load_libraries
 # Configure knitr options
 knitr::opts_chunk$set(warning = FALSE, message = FALSE)
 # Path management
 library(here)
 # Spatial data libraries
 library(sf)
 library(terra)
 library(exactextractr)
 # library(raster) - Removed as it's no longer maintained
 # Data manipulation and visualization
 library(tidyverse)  # Includes dplyr, ggplot2, etc.
 library(tmap)
 library(lubridate)
 library(zoo)
 # Machine learning
 library(rsample)
 library(caret)
 library(randomForest)
 library(CAST)
 # Load custom utility functions
 tryCatch({
 source("report_utils.R")
 }, error = function(e) {
 message(paste("Error loading report_utils.R:", e$message))
 # Try alternative path if the first one fails
 tryCatch({
 source(here::here("r_app", "report_utils.R"))
 }, error = function(e) {
 stop("Could not load report_utils.R from either location: ", e$message)
 })
 })
 # Load executive report utilities
 tryCatch({
 source("executive_report_utils.R")
 }, error = function(e) {
 message(paste("Error loading executive_report_utils.R:", e$message))
 # Try alternative path if the first one fails
 tryCatch({
 source(here::here("r_app", "executive_report_utils.R"))
 }, error = function(e) {
 stop("Could not load executive_report_utils.R from either location: ", e$message)
 })
 })
 # Chunk 1: setup_parameters
 # Set up basic report parameters from input values
 report_date <- params$report_date
 mail_day <- params$mail_day
 borders <- params$borders
 use_breaks <- params$use_breaks  # Whether to use breaks or continuous spectrum in visualizations
 # Environment setup notes (commented out)
 # # Activeer de renv omgeving
 # renv::activate()
 # renv::deactivate()
 # # Optioneel: Herstel de omgeving als dat nodig is
 # # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
 # renv::restore()
 # Chunk 2: load_libraries
 # Configure knitr options
 knitr::opts_chunk$set(warning = FALSE, message = FALSE)
 # Path management
 library(here)
 # Spatial data libraries
 library(sf)
 library(terra)
 library(exactextractr)
 # library(raster) - Removed as it's no longer maintained
 # Data manipulation and visualization
 library(tidyverse)  # Includes dplyr, ggplot2, etc.
 library(tmap)
 library(lubridate)
 library(zoo)
 # Machine learning
 library(rsample)
 library(caret)
 library(randomForest)
 library(CAST)
 # Load custom utility functions
 # tryCatch({
 #   source("report_utils.R")
 # }, error = function(e) {
 #   message(paste("Error loading report_utils.R:", e$message))
 #   # Try alternative path if the first one fails
 #   tryCatch({
 source(here::here("r_app", "report_utils.R"))
 #   }, error = function(e) {
 #     stop("Could not load report_utils.R from either location: ", e$message)
 #   })
 # })
 # Load executive report utilities
 # tryCatch({
 #   source("executive_report_utils.R")
 # }, error = function(e) {
 #   message(paste("Error loading executive_report_utils.R:", e$message))
 #   # Try alternative path if the first one fails
 #   tryCatch({
 source(here::here("r_app", "executive_report_utils.R"))
 # Chunk 1: setup_parameters
 # Set up basic report parameters from input values
 report_date <- params$report_date
 mail_day <- params$mail_day
 borders <- params$borders
 use_breaks <- params$use_breaks  # Whether to use breaks or continuous spectrum in visualizations
 # Environment setup notes (commented out)
 # # Activeer de renv omgeving
 # renv::activate()
 # renv::deactivate()
 # # Optioneel: Herstel de omgeving als dat nodig is
 # # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
 # renv::restore()
 # Chunk 2: load_libraries
 # Configure knitr options
 knitr::opts_chunk$set(warning = FALSE, message = FALSE)
 # Path management
 library(here)
 # Spatial data libraries
 library(sf)
 library(terra)
 library(exactextractr)
 # library(raster) - Removed as it's no longer maintained
 # Data manipulation and visualization
 library(tidyverse)  # Includes dplyr, ggplot2, etc.
 library(tmap)
 library(lubridate)
 library(zoo)
 # Machine learning
 library(rsample)
 library(caret)
 library(randomForest)
 library(CAST)
 # Load custom utility functions
 # tryCatch({
 #   source("report_utils.R")
 # }, error = function(e) {
 #   message(paste("Error loading report_utils.R:", e$message))
 #   # Try alternative path if the first one fails
 #   tryCatch({
 source(here::here("r_app", "report_utils.R"))
 #   }, error = function(e) {
 #     stop("Could not load report_utils.R from either location: ", e$message)
 #   })
 # })
 # Load executive report utilities
 # tryCatch({
 #   source("executive_report_utils.R")
 # }, error = function(e) {
 #   message(paste("Error loading executive_report_utils.R:", e$message))
 #   # Try alternative path if the first one fails
 #   tryCatch({
 source(here::here("r_app", "exec_dashboard", "executive_report_utils.R"))
 #   }, error = function(e) {
 #     stop("Could not load executive_report_utils.R from either location: ", e$message)
 #   })
 # })
 # Chunk 3: initialize_project_config
 # Set the project directory from parameters
 project_dir <- params$data_dir
 # Source project parameters with error handling
 # tryCatch({
 source(here::here("r_app", "parameters_project.R"))
 # }, error = function(e) {
 #   stop("Error loading parameters_project.R: ", e$message)
 # })
 # Log initial configuration
 safe_log("Starting the R Markdown script")
 safe_log(paste("mail_day params:", params$mail_day))
 safe_log(paste("report_date params:", params$report_date))
 safe_log(paste("mail_day variable:", mail_day))
 # Set locale for consistent date formatting
 Sys.setlocale("LC_TIME", "C")
 # Initialize date variables from parameters
 today <- as.character(report_date)
 mail_day_as_character <- as.character(mail_day)
 # Calculate week days
 report_date_as_week_day <- weekdays(lubridate::ymd(today))
 days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
 # Calculate initial week number
 week <- lubridate::week(today)
 safe_log(paste("Initial week calculation:", week, "today:", today))
 # Calculate previous dates for comparisons
 today_minus_1 <- as.character(lubridate::ymd(today) - 7)
 today_minus_2 <- as.character(lubridate::ymd(today) - 14)
 today_minus_3 <- as.character(lubridate::ymd(today) - 21)
 # Log the weekday calculations for debugging
 safe_log(paste("Report date weekday:", report_date_as_week_day))
 safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day)))
 safe_log(paste("Mail day:", mail_day_as_character))
 safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character)))
 # Adjust week calculation based on mail day
 if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
 safe_log("Adjusting weeks because of mail day")
 week <- lubridate::week(today) + 1
 today_minus_1 <- as.character(lubridate::ymd(today))
 today_minus_2 <- as.character(lubridate::ymd(today) - 7)
 today_minus_3 <- as.character(lubridate::ymd(today) - 14)
 }
 # Generate subtitle for report
 subtitle_var <- paste("Report generated on", Sys.Date())
 # Calculate week numbers for previous weeks
 week_minus_1 <- week - 1
 week_minus_2 <- week - 2
 week_minus_3 <- week - 3
 # Format current week with leading zeros
 week <- sprintf("%02d", week)
 # Get years for each date
 year <- lubridate::year(today)
 year_1 <- lubridate::year(today_minus_1)
 year_2 <- lubridate::year(today_minus_2)
 year_3 <- lubridate::year(today_minus_3)
 # Load CI index data with error handling
 tryCatch({
 CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
 safe_log("Successfully loaded CI quadrant data")
 }, error = function(e) {
 stop("Error loading CI quadrant data: ", e$message)
 })
 # Get file paths for different weeks using the utility function
 tryCatch({
 path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
 path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
 path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
 path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
 # Log the calculated paths
 safe_log("Required mosaic paths:")
 safe_log(paste("Path to current week:", path_to_week_current))
 safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
 safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
 safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
 # Validate that files exist
 if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
 if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
 if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
 if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
 # Load raster data with terra functions
 CI    <- terra::rast(path_to_week_current)$CI
 CI_m1 <- terra::rast(path_to_week_minus_1)$CI
 CI_m2 <- terra::rast(path_to_week_minus_2)$CI
 CI_m3 <- terra::rast(path_to_week_minus_3)$CI
 }, error = function(e) {
 stop("Error loading raster data: ", e$message)
 })
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,3 @@
 {
    "python.REPL.enableREPLSmartSend": false
 }
--- a/data.xlsx
+++ b/data.xlsx
--- a/Rplots.pdf
+++ b/Rplots.pdf
--- a/figure/sub_chunk_8433-1.png
+++ b/figure/sub_chunk_8433-1.png
--- a/python_app/planet_download.ipynb
+++ b/python_app/planet_download.ipynb
--- a/python_app/planet_download_with_ocm.ipynb
+++ b/python_app/planet_download_with_ocm.ipynb
--- a/python_app/planet_ocm_processor.py
+++ b/python_app/planet_ocm_processor.py
@ -0,0 +1,319 @@
 import os
 import argparse
 import numpy as np
 from pathlib import Path
 from osgeo import gdal
 import rasterio as rio
 from rasterio.enums import Resampling
 from rasterio.warp import reproject
 from osgeo import osr
 # Attempt to import OmniCloudMask and set a flag
 try:
    from omnicloudmask import predict_from_array, load_multiband
    HAS_OCM = True
 except ImportError:
    HAS_OCM = False
 def calculate_utm_zone_and_hemisphere(longitude, latitude):
    """
    Calculate the UTM zone and hemisphere based on longitude and latitude.
    """
    utm_zone = int((longitude + 180) / 6) + 1
    is_southern = latitude < 0
    return utm_zone, is_southern
 def reproject_to_projected_crs(input_path, output_path):
    """
    Reprojects a raster to a projected coordinate system (e.g., UTM).
    """
    input_ds = gdal.Open(str(input_path))
    if not input_ds:
        raise ValueError(f"Failed to open input raster: {input_path}")
    # Get the source spatial reference
    source_srs = osr.SpatialReference()
    source_srs.ImportFromWkt(input_ds.GetProjection())
    # Get the geographic coordinates of the image's center
    geo_transform = input_ds.GetGeoTransform()
    width = input_ds.RasterXSize
    height = input_ds.RasterYSize
    center_x = geo_transform[0] + (width / 2) * geo_transform[1]
    center_y = geo_transform[3] + (height / 2) * geo_transform[5]
    # Calculate the UTM zone and hemisphere dynamically
    utm_zone, is_southern = calculate_utm_zone_and_hemisphere(center_x, center_y)
    # Define the target spatial reference
    target_srs = osr.SpatialReference()
    target_srs.SetWellKnownGeogCS("WGS84")
    target_srs.SetUTM(utm_zone, is_southern)
    # Create the warp options
    warp_options = gdal.WarpOptions(
        dstSRS=target_srs.ExportToWkt(),
        format="GTiff"
    )
    # Perform the reprojection
    gdal.Warp(str(output_path), input_ds, options=warp_options)
    input_ds = None  # Close the dataset
    print(f"Reprojected raster saved to: {output_path}")
    return output_path
 def resample_image(input_path, output_path, resolution=(10, 10), resample_alg="bilinear"):
    """
    Resamples a raster to a specified resolution using gdal.Translate.
    """
    print(f"Resampling {input_path} to {resolution}m resolution -> {output_path}")
    # Reproject the input image to a projected CRS
    reprojected_path = str(Path(output_path).with_name(f"{Path(output_path).stem}_reprojected.tif"))
    reproject_to_projected_crs(input_path, reprojected_path)
    # Open the reprojected dataset
    input_ds = gdal.Open(reprojected_path)
    if not input_ds:
        raise ValueError(f"Failed to open reprojected raster: {reprojected_path}")
    # Perform the resampling
    result = gdal.Translate(
        str(output_path),
        input_ds,
        xRes=resolution[0],
        yRes=resolution[1],
        resampleAlg=resample_alg
    )
    input_ds = None # Explicitly dereference the GDAL dataset
    if result is None:
        raise ValueError(f"Failed to resample image to {output_path}")
    print(f"Successfully resampled image saved to: {output_path}")
    return output_path
 def run_ocm_on_image(image_path_10m, ocm_output_dir, save_mask=True):
    """
    Processes a 10m resolution image with OmniCloudMask.
    Adapted from process_with_ocm in the notebook.
    """
    if not HAS_OCM:
        print("OmniCloudMask not available. Please install with: pip install omnicloudmask")
        return None, None
    image_path_10m = Path(image_path_10m)
    ocm_output_dir = Path(ocm_output_dir)
    ocm_output_dir.mkdir(exist_ok=True, parents=True)
    mask_10m_path = ocm_output_dir / f"{image_path_10m.stem}_ocm_mask_10m.tif"
    try:
        # Open the image to check dimensions
        with rio.open(image_path_10m) as src:
            width, height = src.width, src.height
        # Check if the image is too small for OmniCloudMask
        if width < 50 or height < 50:
            print(f"Warning: Image {image_path_10m} is too small for OmniCloudMask (width: {width}, height: {height}). Skipping.")
            return None, None
        # PlanetScope 4-band images are typically [B,G,R,NIR]
        # OCM expects [R,G,NIR] for its default model.
        # Band numbers for load_multiband are 1-based.
        # If original is B(1),G(2),R(3),NIR(4), then R=3, G=2, NIR=4
        band_order = [3, 2, 4] 
        print(f"Loading 10m image for OCM: {image_path_10m}")
        # load_multiband resamples if resample_res is different from source,
        # but here image_path_10m is already 10m.
        # We pass resample_res=None to use the image's own resolution.
        rgn_data, profile = load_multiband(
            input_path=str(image_path_10m),
            resample_res=10, # Explicitly set target resolution for OCM
            band_order=band_order
        )
        print("Applying OmniCloudMask...")
        prediction = predict_from_array(rgn_data)
        if save_mask:
            profile.update(count=1, dtype='uint8')
            with rio.open(mask_10m_path, 'w', **profile) as dst:
                dst.write(prediction.astype('uint8'), 1)
            print(f"Saved 10m OCM mask to: {mask_10m_path}")
        # Summary (optional, can be removed for cleaner script output)
        n_total = prediction.size
        n_clear = np.sum(prediction == 0)
        n_thick = np.sum(prediction == 1)
        n_thin = np.sum(prediction == 2)
        n_shadow = np.sum(prediction == 3)
        print(f"  OCM: Clear: {100*n_clear/n_total:.1f}%, Thick: {100*n_thick/n_total:.1f}%, Thin: {100*n_thin/n_total:.1f}%, Shadow: {100*n_shadow/n_total:.1f}%")
        return str(mask_10m_path), profile
    except Exception as e:
        print(f"Error processing 10m image with OmniCloudMask: {str(e)}")
        return None, None
 def upsample_mask_to_3m(mask_10m_path, target_3m_image_path, output_3m_mask_path):
    """
    Upsamples a 10m OCM mask to match the 3m target image.
    Adapted from upsample_mask_to_highres in the notebook.
    """
    print(f"Upsampling 10m mask {mask_10m_path} to 3m, referencing {target_3m_image_path}")
    with rio.open(mask_10m_path) as src_mask, rio.open(target_3m_image_path) as src_img_3m:
        mask_data_10m = src_mask.read(1)
        img_shape_3m = (src_img_3m.height, src_img_3m.width)
        img_transform_3m = src_img_3m.transform
        img_crs_3m = src_img_3m.crs
        upsampled_mask_3m_data = np.zeros(img_shape_3m, dtype=mask_data_10m.dtype)
        reproject(
            source=mask_data_10m,
            destination=upsampled_mask_3m_data,
            src_transform=src_mask.transform,
            src_crs=src_mask.crs,
            dst_transform=img_transform_3m,
            dst_crs=img_crs_3m,
            resampling=Resampling.nearest
        )
        profile_3m_mask = src_img_3m.profile.copy()
        profile_3m_mask.update({
            'count': 1,
            'dtype': upsampled_mask_3m_data.dtype
        })
        with rio.open(output_3m_mask_path, 'w', **profile_3m_mask) as dst:
            dst.write(upsampled_mask_3m_data, 1)
        print(f"Upsampled 3m OCM mask saved to: {output_3m_mask_path}")
        return str(output_3m_mask_path)
 def apply_3m_mask_to_3m_image(image_3m_path, mask_3m_path, final_masked_output_path):
    """
    Applies an upsampled 3m OCM mask to the original 3m image.
    Adapted from apply_upsampled_mask_to_highres in the notebook.
    """
    print(f"Applying 3m mask {mask_3m_path} to 3m image {image_3m_path}")
    image_3m_path = Path(image_3m_path)
    mask_3m_path = Path(mask_3m_path)
    final_masked_output_path = Path(final_masked_output_path)
    final_masked_output_path.parent.mkdir(parents=True, exist_ok=True)
    try:
        with rio.open(image_3m_path) as src_img_3m, rio.open(mask_3m_path) as src_mask_3m:
            img_data_3m = src_img_3m.read()
            img_profile_3m = src_img_3m.profile.copy()
            mask_data_3m = src_mask_3m.read(1)
            if img_data_3m.shape[1:] != mask_data_3m.shape:
                print(f"Warning: 3m image shape {img_data_3m.shape[1:]} and 3m mask shape {mask_data_3m.shape} do not match.")
                # This should ideally not happen if upsampling was correct.
            # OCM: 0=clear, 1=thick cloud, 2=thin cloud, 3=shadow
            # We want to mask out (set to nodata) pixels where OCM is > 0
            binary_mask = np.ones_like(mask_data_3m, dtype=np.uint8)
            binary_mask[mask_data_3m > 0] = 0  # 0 for cloud/shadow, 1 for clear
            masked_img_data_3m = img_data_3m.copy()
            nodata_val = img_profile_3m.get('nodata', 0) # Use existing nodata or 0
            for i in range(img_profile_3m['count']):
                masked_img_data_3m[i][binary_mask == 0] = nodata_val
        # Ensure dtype of profile matches data to be written
        # If original image was float, but nodata is int (0), rasterio might complain
        # It's safer to use the original image's dtype for the output.
        img_profile_3m.update(dtype=img_data_3m.dtype) 
        with rio.open(final_masked_output_path, 'w', **img_profile_3m) as dst:
            dst.write(masked_img_data_3m)
        print(f"Final masked 3m image saved to: {final_masked_output_path}")
        return str(final_masked_output_path)
    except Exception as e:
        print(f"Error applying 3m mask to 3m image: {str(e)}")
        return None
 def main():
    parser = argparse.ArgumentParser(description="Process PlanetScope 3m imagery with OmniCloudMask.")
    parser.add_argument("input_3m_image", type=str, help="Path to the input merged 3m PlanetScope GeoTIFF image.")
    parser.add_argument("output_dir", type=str, help="Directory to save processed files (10m image, masks, final 3m masked image).")
    args = parser.parse_args()
    try:
        # Resolve paths to absolute paths immediately
        input_3m_path = Path(args.input_3m_image).resolve(strict=True)
        # output_base_dir is the directory where outputs will be saved.
        # It should exist when the script is called (created by the notebook).
        output_base_dir = Path(args.output_dir).resolve(strict=True) 
    except FileNotFoundError as e:
        print(f"Error: Path resolution failed. Input image or output base directory may not exist or is not accessible: {e}")
        return
    except Exception as e:
        print(f"Error resolving paths: {e}")
        return
    # The check for input_3m_path.exists() is now covered by resolve(strict=True)
    # Define intermediate and final file paths using absolute base paths
    intermediate_dir = output_base_dir / "intermediate_ocm_files"
    intermediate_dir.mkdir(parents=True, exist_ok=True)
    image_10m_path = intermediate_dir / f"{input_3m_path.stem}_10m.tif"
    # OCM mask (10m) will be saved inside run_ocm_on_image, in a subdir of intermediate_dir
    ocm_mask_output_dir = intermediate_dir / "ocm_10m_mask_output" 
    # Upsampled OCM mask (3m)
    mask_3m_upsampled_path = intermediate_dir / f"{input_3m_path.stem}_ocm_mask_3m_upsampled.tif"
    # Final masked image (3m)
    final_masked_3m_path = output_base_dir / f"{input_3m_path.stem}_ocm_masked_3m.tif"
    print(f"--- Starting OCM processing for {input_3m_path.name} ---")
    print(f"Input 3m image (absolute): {input_3m_path}")
    print(f"Output base directory (absolute): {output_base_dir}")
    print(f"Intermediate 10m image path: {image_10m_path}")
    # 1. Resample 3m input to 10m for OCM
    try:
        resample_image(input_3m_path, image_10m_path, resolution=(10, 10))
    except Exception as e:
        print(f"Failed to resample to 10m: {e}")
        return
    # 2. Run OCM on the 10m image
    mask_10m_generated_path, _ = run_ocm_on_image(image_10m_path, ocm_mask_output_dir)
    if not mask_10m_generated_path:
        print("OCM processing failed. Exiting.")
        return
    # 3. Upsample the 10m OCM mask to 3m
    try:
        upsample_mask_to_3m(mask_10m_generated_path, input_3m_path, mask_3m_upsampled_path)
    except Exception as e:
        print(f"Failed to upsample 10m OCM mask to 3m: {e}")
        return
    # 4. Apply the 3m upsampled mask to the original 3m image
    try:
        apply_3m_mask_to_3m_image(input_3m_path, mask_3m_upsampled_path, final_masked_3m_path)
    except Exception as e:
        print(f"Failed to apply 3m mask to 3m image: {e}")
        return
    print(f"--- Successfully completed OCM processing for {input_3m_path.name} ---")
    print(f"Final 3m masked output: {final_masked_3m_path}")
 if __name__ == "__main__":
    if not HAS_OCM:
        print("OmniCloudMask library is not installed. Please install it to run this script.")
        print("You can typically install it using: pip install omnicloudmask")
    else:
        main()
--- a/r_app/CI_report_dashboard_planet.Rmd
+++ b/r_app/CI_report_dashboard_planet.Rmd
@ -2,18 +2,17 @@
 params:
  ref: "word-styles-reference-var1.docx"
  output_file: CI_report.docx
-  report_date: "2024-08-28"
+  report_date: "2024-07-18"
-  data_dir: "Chemba"
+  data_dir: "chemba"
  mail_day: "Wednesday"
  borders: TRUE
  use_breaks: FALSE
 output:
 # html_document:
 #   toc: yes
 #   df_print: paged
  word_document:
    reference_docx: !expr file.path("word-styles-reference-var1.docx")
-    toc: yes
+    toc: no
 editor_options:
  chunk_output_type: console
 ---
@ -22,8 +21,7 @@ editor_options:
 # Set up basic report parameters from input values
 report_date <- params$report_date
 mail_day <- params$mail_day
-borders <- params$borders 
+borders <- params$borders
 use_breaks <- params$use_breaks  # Whether to use breaks or continuous spectrum in visualizations
 # Environment setup notes (commented out)
 # # Activeer de renv omgeving
@ -38,26 +36,21 @@ use_breaks <- params$use_breaks  # Whether to use breaks or continuous spectrum
 # Configure knitr options
 knitr::opts_chunk$set(warning = FALSE, message = FALSE)
-# Path management
+# Load all packages at once with suppressPackageStartupMessages
-library(here)
+suppressPackageStartupMessages({
-
+  library(here)
-# Spatial data libraries
+  library(sf)
-library(sf)
+  library(terra)
-library(terra)
+  library(exactextractr)
-library(exactextractr)
+  library(tidyverse)
-# library(raster) - Removed as it's no longer maintained
+  library(tmap)
-
+  library(lubridate)
-# Data manipulation and visualization
+  library(zoo)
-library(tidyverse)  # Includes dplyr, ggplot2, etc.
+  library(rsample)
-library(tmap)
+  library(caret)
-library(lubridate)
+  library(randomForest)
-library(zoo)
+  library(CAST)
-
+})
 # Machine learning
 library(rsample)
 library(caret)
 library(randomForest)
 library(CAST)
 # Load custom utility functions
 tryCatch({
@ -127,9 +120,6 @@ if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_
  today_minus_3 <- as.character(lubridate::ymd(today) - 14)
 }
 # Generate subtitle for report
 subtitle_var <- paste("Report generated on", Sys.Date())
 # Calculate week numbers for previous weeks
 week_minus_1 <- week - 1
 week_minus_2 <- week - 2
@ -217,7 +207,95 @@ tryCatch({
 })
 ```
-`r subtitle_var`
+```{r create_front_page_variables, include=FALSE}
 # Create variables for the front page
 farm_name <- stringr::str_to_title(gsub("_", " ", project_dir))
 # Format dates for display
 report_date_formatted <- format(as.Date(report_date), "%B %d, %Y")
 current_year <- format(Sys.Date(), "%Y")
 # Get total field count and area if available
 tryCatch({
  total_fields <- length(unique(AllPivots0$field))
  total_area_ha <- round(sum(sf::st_area(AllPivots0)) / 10000, 1) # Convert to hectares
 }, error = function(e) {
  total_fields <- "N/A"
  total_area_ha <- "N/A"
 })
 ```
 ---
 title: ""
 ---
 ```{=openxml}
 <w:p>
  <w:pPr>
    <w:jc w:val="center"/>
    <w:spacing w:after="720"/>
  </w:pPr>
  <w:r>
    <w:rPr>
      <w:sz w:val="48"/>
      <w:b/>
    </w:rPr>
    <w:t>SUGARCANE CROP MONITORING REPORT</w:t>
  </w:r>
 </w:p>
 ```
 <div style="text-align: center; margin-top: 2cm; margin-bottom: 2cm;">
 **`r farm_name`**
 **Chlorophyll Index Analysis**
 Report Date: **`r report_date_formatted`**
 ---
 </div>
 <div style="margin-top: 3cm; margin-bottom: 2cm;">
 ## Report Summary
 **Farm Location:** `r farm_name`  
 **Report Period:** Week `r week` of `r current_year`  
 **Data Source:** Planet Labs Satellite Imagery  
 **Analysis Type:** Chlorophyll Index (CI) Monitoring  
 **Field Coverage:**  
 - Total Fields Monitored: `r total_fields`  
 - Total Area: `r total_area_ha` hectares  
 **Report Generated:** `r format(Sys.Date(), "%B %d, %Y at %H:%M")`  
 ---
 ## About This Report
 This automated report provides weekly analysis of sugarcane crop health using satellite-derived Chlorophyll Index (CI) measurements. The analysis helps identify:
 - Field-level crop health variations
 - Weekly changes in crop vigor  
 - Areas requiring agricultural attention
 - Growth patterns across different field sections
 **Key Features:**
 - High-resolution satellite imagery analysis
 - Week-over-week change detection
 - Individual field performance metrics
 - Actionable insights for crop management
 </div>
 \pagebreak
 <!-- Original content starts here -->
 \pagebreak
 # Explanation of the Report
@ -253,34 +331,104 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
 Use these insights to identify areas that may need irrigation, fertilization, or other interventions, and to track the effectiveness of your management practices over time.
 \pagebreak
-# Chlorophyll Index (CI) Overview Map - Current Week
+# RGB Satellite Image - Current Week (if available)
-```{r render_ci_overview_map, echo=FALSE, fig.height=6.8, fig.width=9, message=FALSE, warning=FALSE}
+```{r render_rgb_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
-# Create overview chlorophyll index map
+# Check if RGB bands are available and create RGB map
 tryCatch({
-  # Base shape
+  # Load the full raster to check available bands
-  map <- tmap::tm_shape(CI, unit = "m")
+  full_raster <- terra::rast(path_to_week_current)
  available_bands <- names(full_raster)
-  # Add raster layer with either breaks or continuous spectrum based on parameter
+  # Check if RGB bands are available (look for red, green, blue or similar naming)
-  if (use_breaks) {
+  rgb_bands_available <- any(grepl("red|Red|RED", available_bands, ignore.case = TRUE)) && 
-    map <- map + tmap::tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf), 
+                        any(grepl("green|Green|GREEN", available_bands, ignore.case = TRUE)) && 
-                              palette = "RdYlGn", 
+                        any(grepl("blue|Blue|BLUE", available_bands, ignore.case = TRUE))
-                              midpoint = NA,
+  
-                              legend.is.portrait = FALSE,
+  # Alternative check for numbered bands that might be RGB (e.g., band_1, band_2, band_3)
-                              title = "Chlorophyll Index (CI)")
+  if (!rgb_bands_available && length(available_bands) >= 3) {
-  } else {
+    # Check if we have at least 3 bands that could potentially be RGB
-    map <- map + tmap::tm_raster(palette = "RdYlGn",
+    potential_rgb_bands <- grep("band_[1-3]|B[1-3]|[1-3]", available_bands, ignore.case = TRUE)
-                              style = "cont",
+    rgb_bands_available <- length(potential_rgb_bands) >= 3
                              midpoint = NA,
                              legend.is.portrait = FALSE,
                              title = "Chlorophyll Index (CI)")
  }
  if (rgb_bands_available) {
    safe_log("RGB bands detected - creating RGB visualization")
    # Try to extract RGB bands (prioritize named bands first)
    red_band <- NULL
    green_band <- NULL  
    blue_band <- NULL
    # Look for named RGB bands first
    red_candidates <- grep("red|Red|RED", available_bands, ignore.case = TRUE, value = TRUE)
    green_candidates <- grep("green|Green|GREEN", available_bands, ignore.case = TRUE, value = TRUE)
    blue_candidates <- grep("blue|Blue|BLUE", available_bands, ignore.case = TRUE, value = TRUE)
    if (length(red_candidates) > 0) red_band <- red_candidates[1]
    if (length(green_candidates) > 0) green_band <- green_candidates[1]
    if (length(blue_candidates) > 0) blue_band <- blue_candidates[1]
    # Fallback to numbered bands if named bands not found
    if (is.null(red_band) || is.null(green_band) || is.null(blue_band)) {
      if (length(available_bands) >= 3) {
        # Assume first 3 bands are RGB (common convention)
        red_band <- available_bands[1]
        green_band <- available_bands[2] 
        blue_band <- available_bands[3]
      }
    }
    if (!is.null(red_band) && !is.null(green_band) && !is.null(blue_band)) {
      # Extract RGB bands
      rgb_raster <- c(full_raster[[red_band]], full_raster[[green_band]], full_raster[[blue_band]])
      names(rgb_raster) <- c("red", "green", "blue")
      # Create RGB map
      map <- tmap::tm_shape(rgb_raster, unit = "m") +
        tmap::tm_rgb() +
        tmap::tm_scalebar(position = c("right", "bottom"), text.color = "white") +
        tmap::tm_compass(position = c("right", "bottom"), text.color = "white") +
        tmap::tm_shape(AllPivots0) + 
        tmap::tm_borders(col = "white", lwd = 2) +
        tmap::tm_text("sub_field", size = 0.6, col = "white") +
        tmap::tm_layout(main.title = paste0("RGB Satellite Image - Week ", week),
                       main.title.size = 0.8,
                       main.title.color = "black")
      # Print the map
      print(map)
      safe_log("RGB map created successfully")
    } else {
      safe_log("Could not identify RGB bands despite detection", "WARNING")
      cat("RGB bands detected but could not be properly identified. Skipping RGB visualization.\n")
    }
  } else {
    safe_log("No RGB bands available in the current week mosaic")
    cat("**Note:** RGB satellite imagery is not available for this week. Only spectral index data is available.\n\n")
  }
 }, error = function(e) {
  safe_log(paste("Error creating RGB map:", e$message), "ERROR")
  cat("**Note:** Could not create RGB visualization for this week.\n\n")
 })
 ```
 \pagebreak
 # Chlorophyll Index (CI) Overview Map - Current Week
 ```{r render_ci_overview_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
 # Create overview chlorophyll index map
 tryCatch({  # Base shape
  map <- tmap::tm_shape(CI, unit = "m")  # Add raster layer with continuous spectrum (fixed scale 1-8 for consistent comparison)
  map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
                                                             limits = c(1, 8)),
                            col.legend = tm_legend(title = "Chlorophyll Index (CI)",
                                                 orientation = "landscape", 
                                                 position = tm_pos_out("center", "bottom")))
  # Complete the map with layout and other elements
-  map <- map + tmap::tm_layout(legend.outside = TRUE,
+  map <- map + 
-                          legend.outside.position = "bottom",
+    tmap::tm_scalebar(position = c("right", "bottom"), text.color = "black") +
-                          legend.show = TRUE) +
+    tmap::tm_compass(position = c("right", "bottom"), text.color = "black") +
    tmap::tm_scale_bar(position = tm_pos_out("right", "bottom"), text.color = "black") +
    tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
    tmap::tm_shape(AllPivots0) + 
    tmap::tm_borders(col = "black") +
    tmap::tm_text("sub_field", size = 0.6, col = "black")
@ -293,36 +441,24 @@ tryCatch({
  text(1, 1, "Error creating CI overview map", cex=1.5)
 })
 ```
-\newpage
+\pagebreak
 # Weekly Chlorophyll Index Difference Map
-```{r render_ci_difference_map, echo=FALSE, fig.height=6.8, fig.width=9, message=FALSE, warning=FALSE}
+```{r render_ci_difference_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
 # Create chlorophyll index difference map
-tryCatch({
+tryCatch({  # Base shape
-  # Base shape
+  map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m")  # Add raster layer with continuous spectrum (centered at 0 for difference maps, fixed scale)
-  map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m")
+  map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
-  
+                                                            midpoint = 0,
-  # Add raster layer with either breaks or continuous spectrum based on parameter
+                                                            limits = c(-3, 3)),
-  if (use_breaks) {
+                            col.legend = tm_legend(title = "Chlorophyll Index (CI) Change",
-    map <- map + tmap::tm_raster(breaks = c(-3,-2,-1,0,1,2,3), 
+                                                 orientation = "landscape", 
-                              palette = "RdYlGn", 
+                                                 position = tm_pos_out("center", "bottom")))
                              midpoint = 0,
                              legend.is.portrait = FALSE,
                              title = "Chlorophyll Index (CI) Change")
  } else {
    map <- map + tmap::tm_raster(palette = "RdYlGn",
                              style = "cont",
                              midpoint = 0,
                              legend.is.portrait = FALSE,
                              title = "Chlorophyll Index (CI) Change")
  }
  # Complete the map with layout and other elements
-  map <- map + tmap::tm_layout(legend.outside = TRUE,
+  map <- map + 
-                          legend.outside.position = "bottom",
+    tmap::tm_scalebar(position = c("right", "bottom"), text.color = "black") +
-                          legend.show = TRUE) +
+    tmap::tm_compass(position = c("right", "bottom"), text.color = "black") +
    tmap::tm_scale_bar(position = tm_pos_out("right", "bottom"), text.color = "black") +
    tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
    tmap::tm_shape(AllPivots0) + 
    tmap::tm_borders(col = "black") +
    tmap::tm_text("sub_field", size = 0.6, col = "black")
@ -335,8 +471,8 @@ tryCatch({
  text(1, 1, "Error creating CI difference map", cex=1.5)
 })
 ```
-\newpage
+\pagebreak
-\newpage
+
 ```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'}
 # Generate detailed visualizations for each field
@ -347,7 +483,7 @@ tryCatch({
    dplyr::summarise(.groups = 'drop')
  # Generate plots for each field
-  purrr::walk(AllPivots_merged$field, function(field_name) {
+  purrr::walk(AllPivots_merged$field[1:5], function(field_name) {
    tryCatch({
      cat("\n")  # Add an empty line for better spacing
@ -358,14 +494,12 @@ tryCatch({
        current_ci = CI,
        ci_minus_1 = CI_m1,
        ci_minus_2 = CI_m2,
-        last_week_diff = last_week_dif_raster_abs,
+        last_week_diff = last_week_dif_raster_abs,        three_week_diff = three_week_dif_raster_abs,
        three_week_diff = three_week_dif_raster_abs,
        harvesting_data = harvesting_data,
        week = week,
        week_minus_1 = week_minus_1,
        week_minus_2 = week_minus_2,
        week_minus_3 = week_minus_3,
        use_breaks = use_breaks,
        borders = borders
      )
@ -513,7 +647,7 @@ tryCatch({
  # Predict yields for the current season (focus on mature fields over 300 days)
  pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
-    dplyr::filter(Age_days > 300) %>%
+    dplyr::filter(Age_days > 1) %>%
    dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
  safe_log("Successfully completed yield prediction calculations")
@ -565,3 +699,7 @@ tryCatch({
 })
 ```
 \pagebreak
--- a/r_app/CI_report_dashboard_planet_files/figure-html/ci_diff_kaart-1.png
+++ b/r_app/CI_report_dashboard_planet_files/figure-html/ci_diff_kaart-1.png
--- a/r_app/CI_report_dashboard_planet_files/figure-html/ci_overzicht_kaart-1.png
+++ b/r_app/CI_report_dashboard_planet_files/figure-html/ci_overzicht_kaart-1.png
--- a/r_app/CI_report_executive_summary.Rmd
+++ b/r_app/CI_report_executive_summary.Rmd
--- a/r_app/Rplots.pdf
+++ b/r_app/Rplots.pdf
--- a/r_app/ci_extraction.R
+++ b/r_app/ci_extraction.R
@ -32,12 +32,12 @@ main <- function() {
    end_date <- as.Date(args[1])
    if (is.na(end_date)) {
      warning("Invalid end_date provided. Using default (current date).")
-      #end_date <- Sys.Date()
+      end_date <- Sys.Date()
-      end_date <- "2023-10-01"
+      #end_date <- "2023-10-01"
    }
  } else {
-    #end_date <- Sys.Date()
+    end_date <- Sys.Date()
-    end_date <- "2023-10-01"
+    #end_date <- "2023-10-01"
  }
  # Process offset argument
@ -58,6 +58,10 @@ main <- function() {
    project_dir <- "chemba"
  }
  # Make project_dir available globally so parameters_project.R can use it
  assign("project_dir", project_dir, envir = .GlobalEnv)
  # 3. Initialize project configuration
  # --------------------------------
  new_project_question <- FALSE
@ -68,8 +72,10 @@ main <- function() {
  }, error = function(e) {
    warning("Default source files not found. Attempting to source from 'r_app' directory.")
    tryCatch({
-      source("r_app/parameters_project.R")
+      source(here::here("r_app", "parameters_project.R"))
-      source("r_app/ci_extraction_utils.R")
+      source(here::here("r_app", "ci_extraction_utils.R"))
      warning(paste("Successfully sourced files from 'r_app' directory."))
    }, error = function(e) {
      stop("Failed to source required files from both default and 'r_app' directories.")
    })
@ -106,6 +112,6 @@ main <- function() {
  })
 }
-# Run the main function if the script is executed directly
+if (sys.nframe() == 0) {
-main()
+  main()
-
+}
--- a/r_app/experiments/ci_extraction_and_yield_prediction.R
+++ b/r_app/experiments/ci_extraction_and_yield_prediction.R
@ -0,0 +1,572 @@
 # CI_EXTRACTION_AND_YIELD_PREDICTION.R
 # =====================================
 #
 # This standalone script demonstrates:
 # 1. How Chlorophyll Index (CI) is extracted from satellite imagery
 # 2. How yield prediction is performed based on CI values
 #
 # Created for sharing with colleagues to illustrate the core functionality
 # of the SmartCane monitoring system.
 #
 # -----------------------------
 # PART 1: LIBRARY DEPENDENCIES
 # -----------------------------
 suppressPackageStartupMessages({
  # Spatial data processing
  library(sf)
  library(terra)
  library(exactextractr)
  # Data manipulation
  library(tidyverse)
  library(lubridate)
  library(here)
  # Machine learning for yield prediction
  library(rsample)
  library(caret)
  library(randomForest)
  library(CAST)
 })
 # ----------------------------------
 # PART 2: LOGGING & UTILITY FUNCTIONS
 # ----------------------------------
 #' Safe logging function that works in any environment
 #'
 #' @param message The message to log
 #' @param level The log level (default: "INFO")
 #' @return NULL (used for side effects)
 #'
 safe_log <- function(message, level = "INFO") {
  timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
  formatted_msg <- paste0("[", timestamp, "][", level, "] ", message)
  if (level %in% c("ERROR", "WARNING")) {
    warning(formatted_msg)
  } else {
    message(formatted_msg)
  }
 }
 #' Generate a sequence of dates for processing
 #'
 #' @param end_date The end date for the sequence (Date object)
 #' @param offset Number of days to look back from end_date
 #' @return A list containing week number, year, and a sequence of dates for filtering
 #' 
 date_list <- function(end_date, offset) {
  # Input validation
  if (!lubridate::is.Date(end_date)) {
    end_date <- as.Date(end_date)
    if (is.na(end_date)) {
      stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
    }
  }
  offset <- as.numeric(offset)
  if (is.na(offset) || offset < 1) {
    stop("Invalid offset provided. Expected a positive number.")
  }
  # Calculate date range
  offset <- offset - 1  # Adjust offset to include end_date
  start_date <- end_date - lubridate::days(offset)
  # Extract week and year information
  week <- lubridate::week(start_date)
  year <- lubridate::year(start_date)
  # Generate sequence of dates
  days_filter <- seq(from = start_date, to = end_date, by = "day")
  days_filter <- format(days_filter, "%Y-%m-%d")  # Format for consistent filtering
  # Log the date range
  safe_log(paste("Date range generated from", start_date, "to", end_date))
  return(list(
    "week" = week,
    "year" = year,
    "days_filter" = days_filter,
    "start_date" = start_date,
    "end_date" = end_date
  ))
 }
 # -----------------------------
 # PART 3: CI EXTRACTION PROCESS
 # -----------------------------
 #' Find satellite imagery files within a specific date range
 #'
 #' @param image_folder Path to the folder containing satellite images
 #' @param date_filter Vector of dates to filter by (in YYYY-MM-DD format)
 #' @return Vector of file paths matching the date filter
 #' 
 find_satellite_images <- function(image_folder, date_filter) {
  # Validate inputs
  if (!dir.exists(image_folder)) {
    stop(paste("Image folder not found:", image_folder))
  }
  # List all files in the directory
  all_files <- list.files(image_folder, pattern = "\\.tif$", full.names = TRUE, recursive = TRUE)
  if (length(all_files) == 0) {
    safe_log("No TIF files found in the specified directory", "WARNING")
    return(character(0))
  }
  # Filter files by date pattern in filename
  filtered_files <- character(0)
  for (date in date_filter) {
    # Format date for matching (remove dashes)
    date_pattern <- gsub("-", "", date)
    # Find files with matching date pattern
    matching_files <- all_files[grepl(date_pattern, all_files)]
    if (length(matching_files) > 0) {
      filtered_files <- c(filtered_files, matching_files)
      safe_log(paste("Found", length(matching_files), "files for date", date))
    }
  }
  return(filtered_files)
 }
 #' Create a Chlorophyll Index (CI) from satellite imagery
 #'
 #' @param raster_obj A SpatRaster object with Red, Green, Blue, and NIR bands
 #' @return A SpatRaster object with a CI band
 #' 
 calculate_ci <- function(raster_obj) {
  # Validate input has required bands
  if (terra::nlyr(raster_obj) < 4) {
    stop("Raster must have at least 4 bands (Red, Green, Blue, NIR)")
  }
  # Extract bands (assuming standard order: B, G, R, NIR)
  blue_band <- raster_obj[[1]]  
  green_band <- raster_obj[[2]]
  red_band <- raster_obj[[3]]
  nir_band <- raster_obj[[4]]
  # CI formula: (NIR / Red) - 1
  # This highlights chlorophyll content in vegetation
  ci_raster <- (nir_band / red_band) - 1
  # Filter extreme values that may result from division operations
  ci_raster[ci_raster > 10] <- 10  # Cap max value
  ci_raster[ci_raster < 0] <- 0    # Cap min value
  # Name the layer
  names(ci_raster) <- "CI"
  return(ci_raster)
 }
 #' Create a mask for cloudy pixels and shadows using thresholds
 #'
 #' @param raster_obj A SpatRaster object with multiple bands
 #' @return A binary mask where 1=clear pixel, 0=cloudy or shadow pixel
 #' 
 create_cloud_mask <- function(raster_obj) {
  # Extract bands
  blue_band <- raster_obj[[1]]
  green_band <- raster_obj[[2]]
  red_band <- raster_obj[[3]]
  nir_band <- raster_obj[[4]]
  # Create initial mask (all pixels valid)
  mask <- blue_band * 0 + 1
  # Calculate indices used for detection
  ndvi <- (nir_band - red_band) / (nir_band + red_band)
  brightness <- (blue_band + green_band + red_band) / 3
  # CLOUD DETECTION CRITERIA
  # ------------------------
  # Clouds are typically very bright in all bands
  bright_pixels <- (blue_band > 0.3) & (green_band > 0.3) & (red_band > 0.3)
  # Snow/high reflectance clouds have high blue values
  blue_dominant <- blue_band > (red_band * 1.2)
  # Low NDVI areas that are bright are likely clouds
  low_ndvi <- ndvi < 0.1
  # Combine cloud criteria
  cloud_pixels <- bright_pixels & (blue_dominant | low_ndvi)
  # SHADOW DETECTION CRITERIA
  # ------------------------
  # Shadows typically have:
  # 1. Low overall brightness across all bands
  # 2. Lower NIR reflectance
  # 3. Can still have reasonable NDVI (if over vegetation)
  # Dark pixels in visible spectrum
  dark_pixels <- brightness < 0.1
  # Low NIR reflectance
  low_nir <- nir_band < 0.15
  # Shadows often have higher blue proportion relative to NIR
  blue_nir_ratio <- blue_band / (nir_band + 0.01)  # Add small constant to avoid division by zero
  blue_enhanced <- blue_nir_ratio > 0.8
  # Combine shadow criteria
  shadow_pixels <- dark_pixels & (low_nir | blue_enhanced)
  # Update mask (0 for cloud or shadow pixels)
  mask[cloud_pixels | shadow_pixels] <- 0
  # Optional: create different values for clouds vs shadows for visualization
  # mask[cloud_pixels] <- 0  # Clouds
  # mask[shadow_pixels] <- 0  # Shadows
  return(mask)
 }
 #' Process satellite image, calculate CI, and crop to field boundaries
 #'
 #' @param file Path to the satellite image file
 #' @param field_boundaries Field boundaries vector object
 #' @param output_dir Directory to save the processed raster
 #' @return Path to the processed raster file
 #' 
 process_satellite_image <- function(file, field_boundaries, output_dir) {
  # Validate inputs
  if (!file.exists(file)) {
    stop(paste("File not found:", file))
  }
  if (is.null(field_boundaries)) {
    stop("Field boundaries are required but were not provided")
  }
  # Create output filename
  basename_no_ext <- tools::file_path_sans_ext(basename(file))
  output_file <- here::here(output_dir, paste0(basename_no_ext, "_CI.tif"))
  # Process with error handling
  tryCatch({
    # Load and prepare raster
    loaded_raster <- terra::rast(file)
    # Calculate CI
    ci_raster <- calculate_ci(loaded_raster)
    # Create cloud mask
    cloud_mask <- create_cloud_mask(loaded_raster)
    # Apply cloud mask to CI
    ci_masked <- ci_raster * cloud_mask
    # Crop to field boundaries extent (for efficiency)
    field_extent <- terra::ext(field_boundaries)
    ci_cropped <- terra::crop(ci_masked, field_extent)
    # Write output
    terra::writeRaster(ci_cropped, output_file, overwrite = TRUE)
    safe_log(paste("Successfully processed", basename(file)))
    return(output_file)
  }, error = function(e) {
    safe_log(paste("Error processing", basename(file), ":", e$message), "ERROR")
    return(NULL)
  })
 }
 #' Extract CI statistics for each field
 #'
 #' @param ci_raster A SpatRaster with CI values
 #' @param field_boundaries An sf object with field polygons
 #' @return A data frame with CI statistics by field
 #' 
 extract_ci_by_field <- function(ci_raster, field_boundaries) {
  # Validate inputs
  if (is.null(ci_raster)) {
    stop("CI raster is required but was NULL")
  }
  if (is.null(field_boundaries) || nrow(field_boundaries) == 0) {
    stop("Field boundaries are required but were empty")
  }
  # Extract statistics using exact extraction (weighted by coverage)
  ci_stats <- exactextractr::exact_extract(
    ci_raster,
    field_boundaries,
    fun = c("mean", "median", "min", "max", "stdev", "count"),
    progress = FALSE
  )
  # Add field identifiers
  ci_stats$field <- field_boundaries$field
  if ("sub_field" %in% names(field_boundaries)) {
    ci_stats$sub_field <- field_boundaries$sub_field
  } else {
    ci_stats$sub_field <- field_boundaries$field
  }
  # Add date info
  ci_stats$date <- Sys.Date()
  # Clean up names
  names(ci_stats) <- gsub("CI\\.", "", names(ci_stats))
  return(ci_stats)
 }
 # -----------------------------------------
 # PART 4: YIELD PREDICTION IMPLEMENTATION
 # -----------------------------------------
 #' Prepare data for yield prediction model
 #'
 #' @param ci_data Data frame with cumulative CI values
 #' @param harvest_data Data frame with harvest information
 #' @return Data frame ready for modeling
 #'
 prepare_yield_prediction_data <- function(ci_data, harvest_data) {
  # Join CI and yield data
  ci_and_yield <- dplyr::left_join(ci_data, harvest_data, by = c("field", "sub_field", "season")) %>%
    dplyr::group_by(sub_field, season) %>% 
    dplyr::slice(which.max(DOY)) %>%
    dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
    dplyr::mutate(CI_per_day = cumulative_CI / DOY)
  # Split into training and prediction sets
  ci_and_yield_train <- ci_and_yield %>% 
    as.data.frame() %>% 
    dplyr::filter(!is.na(tonnage_ha))
  prediction_yields <- ci_and_yield %>% 
    as.data.frame() %>% 
    dplyr::filter(is.na(tonnage_ha))
  return(list(
    train = ci_and_yield_train,
    predict = prediction_yields
  ))
 }
 #' Train a random forest model for yield prediction
 #'
 #' @param training_data Data frame with training data
 #' @param predictors Vector of predictor variable names
 #' @param response Name of the response variable
 #' @return Trained model
 #'
 train_yield_model <- function(training_data, predictors = c("cumulative_CI", "DOY", "CI_per_day"), response = "tonnage_ha") {
  # Configure model training parameters
  ctrl <- caret::trainControl(
    method = "cv",
    savePredictions = TRUE,
    allowParallel = TRUE,
    number = 5,
    verboseIter = TRUE
  )
  # Train the model with feature selection
  set.seed(202) # For reproducibility
  model_ffs_rf <- CAST::ffs(
    training_data[, predictors],
    training_data[, response],
    method = "rf",
    trControl = ctrl,
    importance = TRUE,
    withinSE = TRUE,
    tuneLength = 5,
    na.rm = TRUE
  )
  return(model_ffs_rf)
 }
 #' Format predictions into a clean data frame
 #'
 #' @param predictions Raw prediction results
 #' @param newdata Original data frame with field information
 #' @return Formatted predictions data frame
 #'
 prepare_predictions <- function(predictions, newdata) {
  return(predictions %>%
    as.data.frame() %>%
    dplyr::rename(predicted_Tcha = ".") %>%
    dplyr::mutate(
      sub_field = newdata$sub_field,
      field = newdata$field,
      Age_days = newdata$DOY,
      total_CI = round(newdata$cumulative_CI, 0),
      predicted_Tcha = round(predicted_Tcha, 0),
      season = newdata$season
    ) %>%
    dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
    dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
  )
 }
 #' Predict yields for mature fields
 #'
 #' @param model Trained model
 #' @param prediction_data Data frame with fields to predict
 #' @param min_age Minimum age in days to qualify as mature (default: 300)
 #' @return Data frame with yield predictions
 #'
 predict_yields <- function(model, prediction_data, min_age = 300) {
  # Make predictions
  predictions <- stats::predict(model, newdata = prediction_data)
  # Format predictions
  pred_formatted <- prepare_predictions(predictions, prediction_data) %>%
    dplyr::filter(Age_days > min_age) %>%
    dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
  return(pred_formatted)
 }
 # ------------------------------
 # PART 5: DEMONSTRATION WORKFLOW
 # ------------------------------
 #' Demonstration workflow showing how to use the functions
 #'
 #' @param end_date The end date for processing satellite images
 #' @param offset Number of days to look back
 #' @param image_folder Path to the folder containing satellite images
 #' @param field_boundaries_path Path to field boundaries shapefile
 #' @param output_dir Path to save processed outputs
 #' @param harvest_data_path Path to historical harvest data
 #'
 demo_workflow <- function(end_date = Sys.Date(), offset = 7, 
                         image_folder = "path/to/satellite/images",
                         field_boundaries_path = "path/to/field_boundaries.shp",
                         output_dir = "path/to/output",
                         harvest_data_path = "path/to/harvest_data.csv") {
  # Step 1: Generate date list for processing
  dates <- date_list(end_date, offset)
  safe_log(paste("Processing data for week", dates$week, "of", dates$year))
  # Step 2: Load field boundaries
  field_boundaries <- sf::read_sf(field_boundaries_path)
  safe_log(paste("Loaded", nrow(field_boundaries), "field boundaries"))
  # Step 3: Find satellite images for the specified date range
  image_files <- find_satellite_images(image_folder, dates$days_filter)
  safe_log(paste("Found", length(image_files), "satellite images for processing"))
  # Step 4: Process each satellite image and calculate CI
  ci_files <- list()
  for (file in image_files) {
    ci_file <- process_satellite_image(file, field_boundaries, output_dir)
    if (!is.null(ci_file)) {
      ci_files <- c(ci_files, ci_file)
    }
  }
  # Step 5: Extract CI statistics for each field
  ci_stats_list <- list()
  for (ci_file in ci_files) {
    ci_raster <- terra::rast(ci_file)
    ci_stats <- extract_ci_by_field(ci_raster, field_boundaries)
    ci_stats_list[[basename(ci_file)]] <- ci_stats
  }
  # Combine all stats
  all_ci_stats <- dplyr::bind_rows(ci_stats_list)
  safe_log(paste("Extracted CI statistics for", nrow(all_ci_stats), "field-date combinations"))
  # Step 6: Prepare for yield prediction
  if (file.exists(harvest_data_path)) {
    # Load harvest data
    harvest_data <- read.csv(harvest_data_path)
    safe_log("Loaded harvest data for yield prediction")
    # Make up cumulative_CI data for demonstration purposes
    # In a real scenario, this would come from accumulating CI values over time
    ci_data <- all_ci_stats %>%
      dplyr::group_by(field, sub_field) %>%
      dplyr::summarise(
        cumulative_CI = sum(mean, na.rm = TRUE),
        DOY = n(),  # Days of year as the count of observations
        season = lubridate::year(max(date, na.rm = TRUE)),
        .groups = "drop"
      )
    # Prepare data for modeling
    modeling_data <- prepare_yield_prediction_data(ci_data, harvest_data)
    if (nrow(modeling_data$train) > 0) {
      # Train yield prediction model
      yield_model <- train_yield_model(modeling_data$train)
      safe_log("Trained yield prediction model")
      # Predict yields for mature fields
      yield_predictions <- predict_yields(yield_model, modeling_data$predict)
      safe_log(paste("Generated yield predictions for", nrow(yield_predictions), "fields"))
      # Return results
      return(list(
        ci_stats = all_ci_stats,
        yield_predictions = yield_predictions,
        model = yield_model
      ))
    } else {
      safe_log("No training data available for yield prediction", "WARNING")
      return(list(ci_stats = all_ci_stats))
    }
  } else {
    safe_log("Harvest data not found, skipping yield prediction", "WARNING")
    return(list(ci_stats = all_ci_stats))
  }
 }
 # ------------------------------
 # PART 6: USAGE EXAMPLE
 # ------------------------------
 # Uncomment and modify paths to run the demo workflow
 # results <- demo_workflow(
 #   end_date = "2023-10-01",
 #   offset = 7,
 #   image_folder = "data/satellite_images",
 #   field_boundaries_path = "data/field_boundaries.shp",
 #   output_dir = "output/processed",
 #   harvest_data_path = "data/harvest_history.csv"
 # )
 #
 # # Access results
 # ci_stats <- results$ci_stats
 # yield_predictions <- results$yield_predictions
 #
 # # Example: Plot CI distribution by field
 # if (require(ggplot2)) {
 #   ggplot(ci_stats, aes(x = field, y = mean, fill = field)) +
 #     geom_boxplot() +
 #     labs(title = "CI Distribution by Field",
 #          x = "Field",
 #          y = "Mean CI") +
 #     theme_minimal() +
 #     theme(axis.text.x = element_text(angle = 45, hjust = 1))
 # }
 #
 # # Example: Plot predicted yield vs age
 # if (exists("yield_predictions") && require(ggplot2)) {
 #   ggplot(yield_predictions, aes(x = Age_days, y = predicted_Tcha, color = field)) +
 #     geom_point(size = 3) +
 #     geom_text(aes(label = field), hjust = -0.2, vjust = -0.2) +
 #     labs(title = "Predicted Yield by Field Age",
 #          x = "Age (Days)",
 #          y = "Predicted Yield (Tonnes/ha)") +
 #     theme_minimal()
 # }
--- a/r_app/experiments/counting_clouds.R
+++ b/r_app/experiments/counting_clouds.R
--- a/r_app/experiments/delete_cloud_exploratoin
+++ b/r_app/experiments/delete_cloud_exploratoin
@ -0,0 +1,556 @@
 # Cloud and Shadow Detection Analysis
 # This script analyzes cloud and shadow detection parameters using the diagnostic GeoTIFF files
 # and polygon-based classification to help optimize the detection algorithms
 # Load required packages
 library(terra)
 library(sf)
 library(dplyr)
 library(ggplot2)
 library(reshape2)
 library(exactextractr) # For accurate polygon extraction
 # Define diagnostic directory
 diagnostic_dir <- "C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane/cloud_mask_diagnostics_20250515-164357"
 # Simple logging function for this standalone script
 safe_log <- function(message, level = "INFO") {
  cat(paste0("[", level, "] ", message, "\n"))
 }
 safe_log("Starting cloud detection analysis on diagnostic rasters")
 # Load all diagnostic rasters
 safe_log("Loading diagnostic raster files...")
 # Load original bands
 red_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_red_band.tif"))
 green_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_green_band.tif"))
 blue_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_blue_band.tif"))
 nir_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_nir_band.tif"))
 # Load derived indices
 brightness <- terra::rast(file.path(diagnostic_dir, "diagnostic_brightness.tif"))
 ndvi <- terra::rast(file.path(diagnostic_dir, "diagnostic_ndvi.tif"))
 blue_ratio <- terra::rast(file.path(diagnostic_dir, "diagnostic_blue_ratio.tif"))
 green_nir_ratio <- terra::rast(file.path(diagnostic_dir, "diagnostic_green_nir_ratio.tif"))
 ndwi <- terra::rast(file.path(diagnostic_dir, "diagnostic_ndwi.tif"))
 # Load cloud detection parameters
 bright_pixels <- terra::rast(file.path(diagnostic_dir, "param_bright_pixels.tif"))
 very_bright_pixels <- terra::rast(file.path(diagnostic_dir, "param_very_bright_pixels.tif"))
 blue_dominant <- terra::rast(file.path(diagnostic_dir, "param_blue_dominant.tif"))
 low_ndvi <- terra::rast(file.path(diagnostic_dir, "param_low_ndvi.tif"))
 green_dominant_nir <- terra::rast(file.path(diagnostic_dir, "param_green_dominant_nir.tif"))
 high_ndwi <- terra::rast(file.path(diagnostic_dir, "param_high_ndwi.tif"))
 # Load shadow detection parameters
 dark_pixels <- terra::rast(file.path(diagnostic_dir, "param_dark_pixels.tif"))
 very_dark_pixels <- terra::rast(file.path(diagnostic_dir, "param_very_dark_pixels.tif"))
 low_nir <- terra::rast(file.path(diagnostic_dir, "param_low_nir.tif"))
 shadow_ndvi <- terra::rast(file.path(diagnostic_dir, "param_shadow_ndvi.tif"))
 low_red_to_blue <- terra::rast(file.path(diagnostic_dir, "param_low_red_to_blue.tif"))
 high_blue_to_nir_ratio <- terra::rast(file.path(diagnostic_dir, "param_high_blue_to_nir_ratio.tif"))
 blue_nir_ratio_raw <- terra::rast(file.path(diagnostic_dir, "param_blue_nir_ratio_raw.tif"))
 red_blue_ratio_raw <- terra::rast(file.path(diagnostic_dir, "param_red_blue_ratio_raw.tif"))
 # Load edge detection parameters
 brightness_focal_sd <- terra::rast(file.path(diagnostic_dir, "param_brightness_focal_sd.tif"))
 edge_pixels <- terra::rast(file.path(diagnostic_dir, "param_edge_pixels.tif"))
 # Load final masks
 cloud_mask <- terra::rast(file.path(diagnostic_dir, "mask_cloud.tif"))
 shadow_mask <- terra::rast(file.path(diagnostic_dir, "mask_shadow.tif"))
 combined_mask <- terra::rast(file.path(diagnostic_dir, "mask_combined.tif"))
 dilated_mask <- terra::rast(file.path(diagnostic_dir, "mask_dilated.tif"))
 safe_log("Raster data loaded successfully")
 # Try to read the classification polygons if they exist
 tryCatch({
  # Check if the classes.geojson file exists in the diagnostic directory
  classes_file <- file.path(diagnostic_dir, "classes.geojson")
  # If no classes file in this directory, look for the most recent one
  if (!file.exists(classes_file)) {
    # Look in parent directory for most recent cloud_mask_diagnostics folder
    potential_dirs <- list.dirs(path = dirname(diagnostic_dir), 
                               full.names = TRUE, 
                               recursive = FALSE)
    # Filter for diagnostic directories and find the most recent one that has classes.geojson
    diagnostic_dirs <- potential_dirs[grepl("cloud_mask_diagnostics_", potential_dirs)]
    for (dir in rev(sort(diagnostic_dirs))) { # Reverse sort to get newest first
      potential_file <- file.path(dir, "classes.geojson")
      if (file.exists(potential_file)) {
        classes_file <- potential_file
        break
      }
    }
  }
  # Check if we found a classes file
  if (file.exists(classes_file)) {
    safe_log(paste("Using classification polygons from:", classes_file))
    # Load the classification polygons
    classifications <- sf::st_read(classes_file, quiet = TRUE) %>% rename(class = type)
    # Remove empty polygons
    classifications <- classifications[!sf::st_is_empty(classifications), ]
    # Create a list to store all rasters we want to extract values from
    extraction_rasters <- list(
      # Original bands
      red = red_band,
      green = green_band,
      blue = blue_band,
      nir = nir_band,
      # Derived indices
      brightness = brightness,
      ndvi = ndvi,
      blue_ratio = blue_ratio,
      green_nir_ratio = green_nir_ratio,
      ndwi = ndwi,
      # Cloud detection parameters
      bright_pixels = terra::ifel(bright_pixels, 1, 0),
      very_bright_pixels = terra::ifel(very_bright_pixels, 1, 0),
      blue_dominant = terra::ifel(blue_dominant, 1, 0),
      low_ndvi = terra::ifel(low_ndvi, 1, 0),
      green_dominant_nir = terra::ifel(green_dominant_nir, 1, 0),
      high_ndwi = terra::ifel(high_ndwi, 1, 0),
      # Shadow detection parameters
      dark_pixels = terra::ifel(dark_pixels, 1, 0),
      very_dark_pixels = terra::ifel(very_dark_pixels, 1, 0),
      low_nir = terra::ifel(low_nir, 1, 0),
      shadow_ndvi = terra::ifel(shadow_ndvi, 1, 0),
      low_red_to_blue = terra::ifel(low_red_to_blue, 1, 0),
      high_blue_to_nir_ratio = terra::ifel(high_blue_to_nir_ratio, 1, 0),
      blue_nir_ratio_raw = (blue_band / (nir_band + 0.01)),
      red_blue_ratio_raw = (red_band / (blue_band + 0.01)),
      # Edge detection parameters
      brightness_focal_sd = brightness_focal_sd,
      edge_pixels = terra::ifel(edge_pixels, 1, 0),
      # Final masks
      cloud_mask = terra::ifel(cloud_mask, 1, 0),
      shadow_mask = terra::ifel(shadow_mask, 1, 0),
      combined_mask = terra::ifel(combined_mask, 1, 0),
      dilated_mask = terra::ifel(dilated_mask, 1, 0)
    )
    # Create a stack of all rasters
    extraction_stack <- terra::rast(extraction_rasters)
    # User-provided simplified extraction for mean statistics per polygon
    pivot_stats_sf <- cbind(
      classifications, 
      round(exactextractr::exact_extract(extraction_stack, classifications, fun = "mean", progress = FALSE), 2)
    )   %>%
      sf::st_drop_geometry()
    # Convert to a regular data frame for easier downstream processing
    all_stats <- sf::st_drop_geometry(pivot_stats_sf)
    # Ensure 'class_name' column exists, if not, use 'class' as 'class_name'
    if (!("class_name" %in% colnames(all_stats)) && ("class" %in% colnames(all_stats))) {
      all_stats$class_name <- all_stats$class
      if (length(valid_class_ids) == 0) {
        safe_log("No valid (non-NA) class IDs found for exactextractr processing.", "WARNING")
      }
      for (class_id in valid_class_ids) {
        # Subset polygons for this class
        class_polygons_sf <- classifications[which(classifications$class == class_id), ] # Use which for NA-safe subsetting
        if (nrow(class_polygons_sf) == 0) {
          safe_log(paste("Skipping empty class (no polygons after filtering):", class_id), "WARNING")
          next
        }
        tryCatch({
          safe_log(paste("Processing class:", class_id))
          # Check if the polygon overlaps with the raster extent (check based on the combined extent of class polygons)
          rast_extent <- terra::ext(extraction_stack)
          poly_extent <- sf::st_bbox(class_polygons_sf)
          if (poly_extent["xmin"] > rast_extent["xmax"] || 
              poly_extent["xmax"] < rast_extent["xmin"] ||
              poly_extent["ymin"] > rast_extent["ymax"] ||
              poly_extent["ymax"] < rast_extent["ymin"]) {
            safe_log(paste("Skipping class that doesn't overlap with raster:", class_id), "WARNING")
            next
          }
          # exact_extract will process each feature in class_polygons_sf
          # and return a list of data frames (one per feature)
          per_polygon_stats_list <- exactextractr::exact_extract(
            extraction_stack,
            class_polygons_sf,
            function(values, coverage_fraction) {
              # Filter pixels by coverage (e.g., >50% of the pixel is covered by the polygon)
              valid_pixels_idx <- coverage_fraction > 0.5
              df_filtered <- values[valid_pixels_idx, , drop = FALSE]
              if (nrow(df_filtered) == 0) {
                # If no pixels meet coverage, return a data frame with NAs
                # to maintain structure, matching expected column names.
                # Column names are derived from the extraction_stack
                stat_cols <- paste0(names(extraction_stack), "_mean")
                na_df <- as.data.frame(matrix(NA_real_, nrow = 1, ncol = length(stat_cols)))
                names(na_df) <- stat_cols
                return(na_df)
              }
              # Calculate mean for each band (column in df_filtered)
              stats_per_band <- lapply(names(df_filtered), function(band_name) {
                col_data <- df_filtered[[band_name]]
                if (length(col_data) > 0 && sum(!is.na(col_data)) > 0) {
                  mean_val <- mean(col_data, na.rm = TRUE)
                  return(setNames(mean_val, paste0(band_name, "_mean")))
                } else {
                  return(setNames(NA_real_, paste0(band_name, "_mean")))
                }
              })
              # Combine all stats (named values) into a single named vector then data frame
              return(as.data.frame(t(do.call(c, stats_per_band))))
            },
            summarize_df = FALSE, # Important: get a list of DFs, one per polygon
            force_df = TRUE       # Ensure the output of the summary function is treated as a DF
          )
          # Combine all stats for this class if we have any
          if (length(per_polygon_stats_list) > 0) {
            # per_polygon_stats_list is now a list of single-row data.frames
            class_stats_df <- do.call(rbind, per_polygon_stats_list)
            # Remove rows that are all NA (from polygons with no valid pixels)
            class_stats_df <- class_stats_df[rowSums(is.na(class_stats_df)) < ncol(class_stats_df), ]
            if (nrow(class_stats_df) > 0) {
              # Add class information
              class_stats_df$class <- class_id
              # Get class_name from the first polygon (assuming it's consistent for the class_id)
              # Ensure class_polygons_sf is not empty before accessing class_name
              if ("class_name" %in% names(class_polygons_sf) && nrow(class_polygons_sf) > 0) {
                  class_stats_df$class_name <- as.character(class_polygons_sf$class_name[1])
              } else {
                  class_stats_df$class_name <- as.character(class_id) # Fallback
              }
              # Add to overall results
              all_stats <- rbind(all_stats, class_stats_df)
              safe_log(paste("Successfully extracted data for", nrow(class_stats_df), "polygons in class", class_id))
            } else {
              safe_log(paste("No valid data extracted for class (after NA removal):", class_id), "WARNING")
            }
          } else {
            safe_log(paste("No data frames returned by exact_extract for class:", class_id), "WARNING")
          }
        }, error = function(e) {
          safe_log(paste("Error processing class", class_id, "with exact_extract:", e$message), "ERROR")
        })
      }
        # Save the extracted statistics to a CSV file
      if (nrow(all_stats) > 0) {
        stats_file <- file.path(diagnostic_dir, "class_spectral_stats_mean.csv") # New filename
        write.csv(all_stats, stats_file, row.names = FALSE)
        safe_log(paste("Saved MEAN spectral statistics by class to:", stats_file))
      } else {
        safe_log("No statistics were generated to save.", "WARNING")
      }
      # Calculate optimized thresholds for cloud/shadow detection (using only _mean columns)
      if (nrow(all_stats) > 0 && ncol(all_stats) > 2) { # Check if all_stats has data and parameter columns
        threshold_results <- data.frame(
          parameter = character(),
          best_threshold = numeric(),
          direction = character(),
          target_class = character(),
          vs_class = character(),
          accuracy = numeric(),
          stringsAsFactors = FALSE
        )
        # Define class pairs to analyze
        class_pairs <- list(
          # Cloud vs various surfaces
          c("cloud", "crop"),
          c("cloud", "bare_soil_dry"),
          c("cloud", "bare_soil_wet"),
          # Shadow vs various surfaces
          c("shadow_over_crop", "crop"),
          c("shadow_over_bare_soil", "bare_soil_dry"),
          c("shadow_over_bare_soil", "bare_soil_wet")
        )
        # For now, let's assume all _mean parameters derived from extraction_rasters are relevant for clouds/shadows
        # This part might need more specific logic if you want to distinguish cloud/shadow params  cloud_params <- grep("_mean$", names(extraction_rasters), value = TRUE) 
        params logic
        # Parameters to analyze for shadows (now only _mean versions)tatistics by class to:", stats_file))
        shadow_params <- cloud_params # Simplified: using the same set for now, adjust if specific shadow params are needed
 lds for cloud/shadow detection
        # Find optimal thresholdsframe(
        if (length(class_pairs) > 0 && (length(cloud_params) > 0 || length(shadow_params) > 0)) {
          for (pair in class_pairs) {c(),
            target_class <- pair[1](),
            vs_class <- pair[2](),
             vs_class = character(),
            # Select appropriate parameters based on whether we're analyzing clouds or shadows  accuracy = numeric(),
            if (grepl("cloud", target_class)) {
              params_to_check <- cloud_params
            } else {
              params_to_check <- shadow_paramsto analyze
            }
            # For each parameter, find the best threshold to separate the classesc("cloud", "crop"),
            for (param in params_to_check) {
              if (param %in% colnames(all_stats)) {
                # Get values for both classes
                target_values <- all_stats[all_stats$class_name == target_class, param]
                vs_values <- all_stats[all_stats$class_name == vs_class, param] c("shadow_over_crop", "crop"),
                  c("shadow_over_bare_soil", "bare_soil_dry"),
                if (length(target_values) > 0 && length(vs_values) > 0) {_soil_wet")
                  # Calculate mean and sd for both classes
                  target_mean <- mean(target_values, na.rm = TRUE)
                  target_sd <- sd(target_values, na.rm = TRUE)# Parameters to analyze for clouds
                  vs_mean <- mean(vs_values, na.rm = TRUE), "blue_ratio_mean", "ndvi_mean", 
                  vs_sd <- sd(vs_values, na.rm = TRUE)
                  # Determine if higher or lower values indicate the target classshadow_params <- c("brightness_mean", "dark_pixels_mean", "very_dark_pixels_mean", 
                  if (target_mean > vs_mean) {r_mean", "shadow_ndvi_mean", "blue_nir_ratio_raw_mean", 
                    direction <- ">"_ratio_raw_mean", "low_red_to_blue_mean")
                    # Try different thresholds
                    potential_thresholds <- seq(olds
                      min(min(target_values, na.rm = TRUE), vs_mean + 0.5 * vs_sd),r (pair in class_pairs) {
                      max(max(vs_values, na.rm = TRUE), target_mean - 0.5 * target_sd),
                      length.out = 20
                    )
                  } else { appropriate parameters based on whether we're analyzing clouds or shadows
                    direction <- "<"{
                    # Try different thresholds params_to_check <- cloud_params
                    potential_thresholds <- seq(} else {
                      min(min(vs_values, na.rm = TRUE), target_mean + 0.5 * target_sd),
                      max(max(target_values, na.rm = TRUE), vs_mean - 0.5 * vs_sd),
                      length.out = 20
                    )st threshold to separate the classes
                  }
                  # Calculate accuracy for each threshold# Get values for both classes
                  best_accuracy <- 0_class, param]
                  best_threshold <- ifelse(direction == ">", min(potential_thresholds), max(potential_thresholds))e == vs_class, param]
                  for (threshold in potential_thresholds) {ues) > 0) {
                    if (direction == ">") {
                      correct_target <- sum(target_values > threshold, na.rm = TRUE)a.rm = TRUE)
                      correct_vs <- sum(vs_values <= threshold, na.rm = TRUE)target_sd <- sd(target_values, na.rm = TRUE)
                    } else {              vs_mean <- mean(vs_values, na.rm = TRUE)
                      correct_target <- sum(target_values < threshold, na.rm = TRUE)
                      correct_vs <- sum(vs_values >= threshold, na.rm = TRUE)
                    }
                    er values indicate the target class
                    total_target <- length(target_values)
                    total_vs <- length(vs_values)
                    accuracy <- (correct_target + correct_vs) / (total_target + total_vs)lds <- seq(
                     min(min(target_values, na.rm = TRUE), vs_mean + 0.5 * vs_sd),
                    if (accuracy > best_accuracy) {max(vs_values, na.rm = TRUE), target_mean - 0.5 * target_sd),
                      best_accuracy <- accuracy0
                      best_threshold <- threshold
                    }
                  }
                  # Add to resultslds <- seq(
                  threshold_results <- rbind(threshold_results, data.frame( min(min(vs_values, na.rm = TRUE), target_mean + 0.5 * target_sd),
                    parameter = gsub("_mean", "", param),   max(max(target_values, na.rm = TRUE), vs_mean - 0.5 * vs_sd),
                    best_threshold = best_threshold,    length.out = 20
                    direction = direction,
                    target_class = target_class,
                    vs_class = vs_class,
                    accuracy = best_accuracy,# Calculate accuracy for each threshold
                    stringsAsFactors = FALSE
                  ))direction == ">", min(potential_thresholds), max(potential_thresholds))
                }
              }
            }ction == ">") {
          }
        }
         else {
        # Save threshold results  correct_target <- sum(target_values < threshold, na.rm = TRUE)
        thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")shold, na.rm = TRUE)
        write.csv(threshold_results, thresholds_file, row.names = FALSE)
        safe_log(paste("Saved optimal threshold recommendations to:", thresholds_file))
        # Generate box plots for key parameters to visualize class differencestotal_vs <- length(vs_values)
        if (requireNamespace("ggplot2", quietly = TRUE) && nrow(all_stats) > 0) {
          # Reshape data for plotting (only _mean columns) + correct_vs) / (total_target + total_vs)
          mean_cols <- grep("_mean$", colnames(all_stats), value = TRUE)
          if (length(mean_cols) > 0) {f (accuracy > best_accuracy) {
            plot_data <- reshape2::melt(all_stats,    best_accuracy <- accuracy
                                       id.vars = c("class", "class_name"),    best_threshold <- threshold
                                       measure.vars = mean_cols, # Use only _mean columns
                                       variable.name = "parameter",
                                       value.name = "value")
            # Create directory for plotsnd(threshold_results, data.frame(
            plots_dir <- file.path(diagnostic_dir, "class_plots"), param),
            dir.create(plots_dir, showWarnings = FALSE, recursive = TRUE)t_threshold,
            # Create plots for selected key parameters (ensure they are _mean versions)ass,
            # Adjust key_params to reflect the new column names (e.g., "brightness_mean")vs_class = vs_class,
            key_params_plot <- intersect(c(   accuracy = best_accuracy,
              "brightness_mean", "ndvi_mean", "blue_ratio_mean", "ndwi_mean",     stringsAsFactors = FALSE
              "blue_nir_ratio_raw_mean", "red_blue_ratio_raw_mean"      ))
            ), mean_cols) # Ensure these params exist     }
            }
            for (param in key_params_plot) {
              # param_data <- plot_data[plot_data$parameter == param,] # Exact match for parameter
              # No, grepl was fine if plot_data only contains _mean parameters now.
              # Let's ensure plot_data only has the _mean parameters for simplicity here.
              param_data <- plot_data[plot_data$parameter == param, ]thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")
              if (nrow(param_data) > 0) {file))
                param_name <- gsub("_mean", "", param)
                o visualize class differences
                p <- ggplot2::ggplot(param_data, ggplot2::aes(x = class_name, y = value, fill = class_name)) +s) > 0) {
                  ggplot2::geom_boxplot() +
                  ggplot2::theme_minimal() +
                  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +                           id.vars = c("class", "class_name"),
                  ggplot2::labs(ariable.name = "parameter",
                    title = paste("Distribution of", param_name, "by Land Cover Class"),
                    x = "Class",
                    y = param_name,# Create directory for plots
                    fill = "Class"ass_plots")
                  )_dir, showWarnings = FALSE, recursive = TRUE)
                # Save the plot
                plot_file <- file.path(plots_dir, paste0("boxplot_", param_name, ".png"))ey_params <- c(
                ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150)  "brightness_mean", "ndvi_mean", "blue_ratio_mean", "ndwi_mean",
              }, "red_blue_ratio_raw_mean"
            }
            # Create a summary plot showing multiple parameters
            summary_data <- plot_data[plot_data$parameter %in% ram_data <- plot_data[grepl(param, plot_data$parameter),]
                                   c("brightness_mean", "ndvi_mean", 
                                     "blue_nir_ratio_raw_mean", "red_blue_ratio_raw_mean"),] "", param)
            if (nrow(summary_data) > 0) {= class_name)) +
              # Clean up parameter names for displayboxplot() +
              summary_data$parameter <- gsub("_mean$", "", summary_data$parameter) # Remove _mean suffix for display
              summary_data$parameter <- gsub("_raw$", "", summary_data$parameter) # Keep this if _raw_mean was a thing(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
              # Create faceted plot"Distribution of", param_name, "by Land Cover Class"),
              p <- ggplot2::ggplot(summary_data,  x = "Class",
                                   ggplot2::aes(x = class_name, y = value, fill = class_name)) +    y = param_name,
                ggplot2::geom_boxplot() +ss"
                ggplot2::facet_wrap(~parameter, scales = "free_y") +
                ggplot2::theme_minimal() +
                ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + # Save the plot
                ggplot2::labs(   plot_file <- file.path(plots_dir, paste0("boxplot_", param_name, ".png"))
                  title = "Key Spectral Parameters by Land Cover Class",    ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150)
                  x = "Class",
                  y = "Value",
                  fill = "Class"
                )
              summary_data <- plot_data[plot_data$parameter %in% 
              # Save the summary plot"brightness_mean", "ndvi_mean", 
              summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")atio_raw_mean", "red_blue_ratio_raw_mean"),]
              ggplot2::ggsave(summary_file, p, width = 12, height = 8, dpi = 150)
            }
            # Clean up parameter names for display
            safe_log(paste("Generated spectral parameter plots in:", plots_dir))r <- gsub("_mean", "", summary_data$parameter)
          }w", "", summary_data$parameter)
        } else {
          safe_log("Package 'exactextractr' not available. Install it for more accurate polygon extraction.", "WARNING")
          # Fall back to simple extraction using terra (calculating only mean)::aes(x = class_name, y = value, fill = class_name)) +
          class_stats <- data.frame()
          _wrap(~parameter, scales = "free_y") +
          valid_class_names_fallback <- unique(classifications$class_name)
          valid_class_names_fallback <- valid_class_names_fallback[!is.na(valid_class_names_fallback)](axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
          if (length(valid_class_names_fallback) == 0) {pectral Parameters by Land Cover Class",
              safe_log("No valid (non-NA) class names found for fallback terra::extract processing.", "WARNING") x = "Class",
          }    y = "Value",
          for (class_name_fb in valid_class_names_fallback) {
            class_polygons_fb <- classifications[which(classifications$class_name == class_name_fb), ]
             # Save the summary plot
            if(nrow(class_polygons_fb) == 0) next  summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")
 )
            class_vect_fb <- terra::vect(class_polygons_fb) }
            # Extract values for each raster
            for (i in seq_along(extraction_rasters)) {}
              raster_name <- names(extraction_rasters)[i]
              # terra::extract returns a data.frame with ID and layer valuesractr' not available. Install it for more accurate polygon extraction.", "WARNING")
              # For multiple polygons, it will have multiple rows per polygon if ID is not unique
              # We need to aggregate per polygon, then per class if not already handled by exact_extract style
              # However, for simplicity here, let's assume terra::extract gives one value per polygon for the mean
              # This part of fallback might need more robust aggregation if polygons are complex
              r (class_name in unique(classifications$class_name)) {
              # A more robust terra::extract approach for means per polygon:s[classifications$class_name == class_name, ]
              extracted_values_list <- terra::extract(extraction_rasters[[i]], class_vect_fb, fun = mean, na.rm = TRUE, ID = FALSE)
              # extracted_values_list will be a data.frame with one column (the layer) and rows corresponding to polygons
              if (nrow(extracted_values_list) > 0 && ncol(extracted_values_list) > 0) {r (i in seq_along(extraction_rasters)) {
                # Average over all polygons in this class for this rastertraction_rasters)[i]
                mean_val_for_class <- mean(extracted_values_list[[1]], na.rm = TRUE)ct(extraction_rasters[[i]], class_vect)
                if (!is.na(mean_val_for_class)) {
                    stats_row <- data.frame(
                      class_name = class_name_fb, # Using class_name as the identifier here
                      parameter = paste0(raster_name, "_mean"),
                      value = mean_val_for_class),
                    ) TRUE),
                    class_stats <- rbind(class_stats, stats_row) sd = sd(values[,2], na.rm = TRUE),
                }  min = min(values[,2], na.rm = TRUE),
              }
            } )
          }   
               class_stats <- rbind(class_stats, stats)
          # Save the statistics (if any were generated)    }
          if(nrow(class_stats) > 0) {
            # Reshape class_stats from long to wide for consistency if needed, or save as is.
            # For now, save as long format.
            stats_file <- file.path(diagnostic_dir, "class_spectral_stats_simple_mean_long.csv")
            write.csv(class_stats, stats_file, row.names = FALSE) stats_file <- file.path(diagnostic_dir, "class_spectral_stats_simple.csv")
            safe_log(paste("Saved simple MEAN (long format) spectral statistics by class to:", stats_file))  write.csv(class_stats, stats_file, row.names = FALSE)
          } else {e spectral statistics by class to:", stats_file))
            safe_log("No statistics generated by fallback method.", "WARNING")
          }
        }ve RMarkdown generation
    # Remove RMarkdown generation 
    # safe_log("RMarkdown report generation has been removed as per user request.")
    NING")
  } else {}
    safe_log("No classification polygons file (classes.geojson) found. Skipping spectral analysis.", "WARNING")}, error = function(e) {
  }cessing or spectral analysis:", e$message), "ERROR")
 }, error = function(e) {})
  safe_log(paste("Error in classification polygon processing or spectral analysis:", e$message), "ERROR")
 }) detection analysis script finished.")
 safe_log("Cloud detection analysis script finished.")# Clean up workspace
 rm(list = ls())
 # Clean up workspace
 rm(list = ls())
--- a/r_app/experiments/delete_cloud_exploratoin.R
+++ b/r_app/experiments/delete_cloud_exploratoin.R
@ -0,0 +1,191 @@
 ```r
 # Cloud detection analysis script
 # Load necessary libraries
 library(terra)
 library(exactextractr)
 library(sf)
 library(dplyr)
 library(ggplot2)
 library(tidyr)
 library(reshape2)
 # Define file paths (these should be set to your actual file locations)
 classes_file <- "path/to/classes.geojson"
 rasters_dir <- "path/to/rasters"
 diagnostic_dir <- "path/to/diagnostics"
 # Helper function for logging
 safe_log <- function(message, level = "INFO") {
  timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
  cat(paste0("[", timestamp, "] [", level, "] ", message, "\n"))
 }
 # Main processing block
 # Load classification polygons
 safe_log(paste("Loading classification polygons from:", classes_file))
 classifications <- sf::st_read(classes_file, quiet = TRUE)
 # Ensure the CRS is set (assuming WGS84 here, adjust if necessary)
 safe_log("No CRS found for the classifications. Setting to WGS84 (EPSG:4326).", "WARNING")
 sf::st_crs(classifications) <- 4326
 # List all raster files in the directory
 raster_files <- list.files(rasters_dir, pattern = "\\.tif$", full.names = TRUE)
 # Create a named vector for extraction_rasters based on base names
 extraction_rasters <- setNames(raster_files, tools::file_path_sans_ext(basename(raster_files)))
 # Create a stack of all rasters
 extraction_stack <- terra::rast(extraction_rasters)
 # User-provided simplified extraction for mean statistics per polygon
 safe_log("Extracting mean statistics per polygon using exactextractr...")
 all_stats <- cbind(
  classifications, 
  round(exactextractr::exact_extract(extraction_stack, classifications, fun = "mean", progress = FALSE), 2)
 ) %>%
  sf::st_drop_geometry() # Ensures all_stats is a data frame
 # Ensure 'class_name' column exists, if not, use 'class' as 'class_name'
 all_stats$class_name <- all_stats$class
 # Save the extracted statistics to a CSV file
 stats_file <- file.path(diagnostic_dir, "polygon_mean_spectral_stats.csv")
 write.csv(all_stats, stats_file, row.names = FALSE)
 safe_log(paste("Saved mean spectral statistics per polygon to:", stats_file))
 # Calculate optimized thresholds for cloud/shadow detection
 threshold_results <- data.frame(
  parameter = character(),
  best_threshold = numeric(),
  direction = character(),
  target_class = character(),
  vs_class = character(),
  accuracy = numeric(),
  stringsAsFactors = FALSE
 )
 class_pairs <- list(
  c("cloud", "crop"),
  c("cloud", "bare_soil_dry"),
  c("cloud", "bare_soil_wet"),
  c("shadow_over_crop", "crop"),
  c("shadow_over_bare_soil", "bare_soil_dry"),
  c("shadow_over_bare_soil", "bare_soil_wet")
 )
 cloud_detection_params_for_threshold <- intersect(
  c("mean.brightness", "mean.very_bright_pixels", "mean.blue_dominant", "mean.low_ndvi", "mean.green_dominant_nir", "mean.high_ndwi", "mean.blue_ratio", "mean.ndvi"),
  colnames(all_stats)
 )
 shadow_detection_params_for_threshold <- intersect(
  c("mean.brightness", "mean.dark_pixels", "mean.very_dark_pixels", "mean.low_nir", "mean.shadow_ndvi", "mean.low_red_to_blue", "mean.high_blue_to_nir_ratio", "mean.blue_nir_ratio_raw", "mean.red_blue_ratio_raw"),
  colnames(all_stats)
 )
 for (pair in class_pairs) {
  target_class <- pair[1]
  vs_class <- pair[2]
  params_to_check <- c(cloud_detection_params_for_threshold, shadow_detection_params_for_threshold)
  for (param in params_to_check) {
    target_values <- all_stats[all_stats$class_name == target_class, param]
    vs_values <- all_stats[all_stats$class_name == vs_class, param]
    target_values <- target_values[!is.na(target_values)]
    vs_values <- vs_values[!is.na(vs_values)]
    # Only proceed if both groups have at least one value
    if (length(target_values) > 0 && length(vs_values) > 0) {
      target_mean <- mean(target_values)
      target_sd <- sd(target_values)
      vs_mean <- mean(vs_values)
      vs_sd <- sd(vs_values)
      target_sd[is.na(target_sd)] <- 0
      vs_sd[is.na(vs_sd)] <- 0
      direction <- ifelse(target_mean > vs_mean, ">", "<")
      all_values <- c(target_values, vs_values)
      min_val <- min(all_values)
      max_val <- max(all_values)
      # Only proceed if min and max are finite and not equal
      if (is.finite(min_val) && is.finite(max_val) && min_val != max_val) {
        potential_thresholds <- seq(min_val, max_val, length.out = 20)
        best_accuracy <- -1
        best_threshold <- ifelse(direction == ">", min(potential_thresholds), max(potential_thresholds))
        for (threshold in potential_thresholds) {
          if (direction == ">") {
            correct_target <- sum(target_values > threshold)
            correct_vs <- sum(vs_values <= threshold)
          } else {
            correct_target <- sum(target_values < threshold)
            correct_vs <- sum(vs_values >= threshold)
          }
          accuracy <- (correct_target + correct_vs) / (length(target_values) + length(vs_values))
          if (accuracy > best_accuracy) {
            best_accuracy <- accuracy
            best_threshold <- threshold
          }
        }
        threshold_results <- rbind(threshold_results, data.frame(
          parameter = param,
          best_threshold = best_threshold,
          direction = direction,
          target_class = target_class,
          vs_class = vs_class,
          accuracy = best_accuracy,
          stringsAsFactors = FALSE
        ))
      }
    }
  }
 }
 thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")
 write.csv(threshold_results, thresholds_file, row.names = FALSE)
 safe_log(paste("Saved optimal threshold recommendations to:", thresholds_file))
 # Fix: get plot_measure_cols by matching raster base names to all_stats columns with 'mean.' prefix
 plot_measure_cols <- intersect(names(extraction_rasters), gsub('^mean\\.', '', colnames(all_stats)))
 plot_data <- reshape2::melt(
  all_stats,
  id.vars = c("class", "class_name"),
  measure.vars = paste0("mean.", plot_measure_cols),
  variable.name = "parameter",
  value.name = "value"
 )
 # Remove 'mean.' prefix from parameter column for clarity
 plot_data$parameter <- sub("^mean\\.", "", plot_data$parameter)
 plots_dir <- file.path(diagnostic_dir, "class_plots")
 dir.create(plots_dir, showWarnings = FALSE, recursive = TRUE)
 key_params_for_plot_list <- c("brightness", "ndvi", "blue_ratio", "ndwi", 
                              "blue_nir_ratio_raw", "red_blue_ratio_raw")
 key_params_to_plot <- intersect(key_params_for_plot_list, plot_measure_cols)
 for (param_to_plot in key_params_to_plot) {
  param_data_subset <- plot_data[plot_data$parameter == param_to_plot, ]
  p <- ggplot2::ggplot(param_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) +
    ggplot2::geom_boxplot() +
    ggplot2::theme_minimal() +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
    ggplot2::labs(
      title = paste("Distribution of", param_to_plot, "by Land Cover Class"),
      x = "Class",
      y = param_to_plot,
      fill = "Class"
    )
  plot_file <- file.path(plots_dir, paste0("boxplot_", param_to_plot, ".png"))
  ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150)
 }
 summary_params_for_plot_list <- c("brightness", "ndvi", 
                                  "blue_nir_ratio_raw", "red_blue_ratio_raw")
 summary_params_to_plot <- intersect(summary_params_for_plot_list, plot_measure_cols)
 summary_data_subset <- plot_data[plot_data$parameter %in% summary_params_to_plot,]
 p_summary <- ggplot2::ggplot(summary_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) +
  ggplot2::geom_boxplot() +
  ggplot2::facet_wrap(~parameter, scales = "free_y") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                 strip.text = ggplot2::element_text(size = 8)) +
  ggplot2::labs(
    title = "Summary of Key Spectral Parameters by Land Cover Class",
    x = "Class",
    y = "Value",
    fill = "Class"
  )
 summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")
 ggplot2::ggsave(summary_file, p_summary, width = 12, height = 8, dpi = 150)
 safe_log(paste("Generated spectral parameter plots in:", plots_dir))
 safe_log("Cloud detection analysis script finished.")
 ```
--- a/r_app/experiments/executive_summary/CI_report_executive_summary.Rmd
+++ b/r_app/experiments/executive_summary/CI_report_executive_summary.Rmd
@ -0,0 +1,718 @@
 ---
 params:
  ref: "word-styles-reference-var1.docx"
  output_file: CI_report.docx
  report_date: "2024-08-28"
  data_dir: "Chemba"
  mail_day: "Wednesday"
  borders: TRUE
  use_breaks: FALSE
 output:
 # html_document:
 #   toc: yes
 #   df_print: paged
  word_document:
    reference_docx: !expr file.path("word-styles-reference-var1.docx")
    toc: yes
 editor_options:
  chunk_output_type: console
 ---
 ```{r setup_parameters, include=FALSE}
 # Set up basic report parameters from input values
 report_date <- params$report_date
 mail_day <- params$mail_day
 borders <- params$borders 
 use_breaks <- params$use_breaks  # Whether to use breaks or continuous spectrum in visualizations
 # Environment setup notes (commented out)
 # # Activeer de renv omgeving
 # renv::activate()
 # renv::deactivate()
 # # Optioneel: Herstel de omgeving als dat nodig is
 # # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
 # renv::restore()
 ```
 ```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE}
 # Configure knitr options
 knitr::opts_chunk$set(warning = FALSE, message = FALSE)
 # Path management
 library(here)
 # Spatial data libraries
 library(sf)
 library(terra)
 library(exactextractr)
 # library(raster) - Removed as it's no longer maintained
 # Data manipulation and visualization
 library(tidyverse)  # Includes dplyr, ggplot2, etc.
 library(tmap)
 library(lubridate)
 library(zoo)
 # Machine learning
 library(rsample)
 library(caret)
 library(randomForest)
 library(CAST)
 # Load custom utility functions
 tryCatch({
  source("report_utils.R")
 }, error = function(e) {
  message(paste("Error loading report_utils.R:", e$message))
  # Try alternative path if the first one fails
  tryCatch({
    source(here::here("r_app", "report_utils.R"))
  }, error = function(e) {
    stop("Could not load report_utils.R from either location: ", e$message)
  })
 })
 # Load executive report utilities
 tryCatch({
  source("executive_report_utils.R")
 }, error = function(e) {
  message(paste("Error loading executive_report_utils.R:", e$message))
  # Try alternative path if the first one fails
  tryCatch({
    source(here::here("r_app", "executive_report_utils.R"))
  }, error = function(e) {
    stop("Could not load executive_report_utils.R from either location: ", e$message)
  })
 })
 safe_log("Successfully loaded utility functions")
 ```
 ```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
 # Set the project directory from parameters
 project_dir <- params$data_dir
 # Source project parameters with error handling
 tryCatch({
  source(here::here("r_app", "parameters_project.R"))
 }, error = function(e) {
  stop("Error loading parameters_project.R: ", e$message)
 })
 # Log initial configuration
 safe_log("Starting the R Markdown script")
 safe_log(paste("mail_day params:", params$mail_day))
 safe_log(paste("report_date params:", params$report_date))
 safe_log(paste("mail_day variable:", mail_day))
 ```
 ```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE}
 # Set locale for consistent date formatting
 Sys.setlocale("LC_TIME", "C")
 # Initialize date variables from parameters
 today <- as.character(report_date)
 mail_day_as_character <- as.character(mail_day)
 # Calculate week days
 report_date_as_week_day <- weekdays(lubridate::ymd(today))
 days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
 # Calculate initial week number
 week <- lubridate::week(today)
 safe_log(paste("Initial week calculation:", week, "today:", today))
 # Calculate previous dates for comparisons
 today_minus_1 <- as.character(lubridate::ymd(today) - 7)
 today_minus_2 <- as.character(lubridate::ymd(today) - 14)
 today_minus_3 <- as.character(lubridate::ymd(today) - 21)
 # Log the weekday calculations for debugging
 safe_log(paste("Report date weekday:", report_date_as_week_day))
 safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day)))
 safe_log(paste("Mail day:", mail_day_as_character))
 safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character)))
 # Adjust week calculation based on mail day
 if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
  safe_log("Adjusting weeks because of mail day")
  week <- lubridate::week(today) + 1
  today_minus_1 <- as.character(lubridate::ymd(today))
  today_minus_2 <- as.character(lubridate::ymd(today) - 7)
  today_minus_3 <- as.character(lubridate::ymd(today) - 14)
 }
 # Generate subtitle for report
 subtitle_var <- paste("Report generated on", Sys.Date())
 # Calculate week numbers for previous weeks
 week_minus_1 <- week - 1
 week_minus_2 <- week - 2
 week_minus_3 <- week - 3
 # Format current week with leading zeros
 week <- sprintf("%02d", week)
 # Get years for each date
 year <- lubridate::year(today)
 year_1 <- lubridate::year(today_minus_1)
 year_2 <- lubridate::year(today_minus_2)
 year_3 <- lubridate::year(today_minus_3)
 ```
 ```{r data, message=TRUE, warning=TRUE, include=FALSE}
 # Load CI index data with error handling
 tryCatch({
  CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
  safe_log("Successfully loaded CI quadrant data")
 }, error = function(e) {
  stop("Error loading CI quadrant data: ", e$message)
 })
 # Get file paths for different weeks using the utility function
 tryCatch({
  path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
  path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
  path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
  path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
  # Log the calculated paths
  safe_log("Required mosaic paths:")
  safe_log(paste("Path to current week:", path_to_week_current))
  safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
  safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
  safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
  # Validate that files exist
  if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
  if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
  if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
  if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
  # Load raster data with terra functions
  CI    <- terra::rast(path_to_week_current)$CI
  CI_m1 <- terra::rast(path_to_week_minus_1)$CI
  CI_m2 <- terra::rast(path_to_week_minus_2)$CI
  CI_m3 <- terra::rast(path_to_week_minus_3)$CI
 }, error = function(e) {
  stop("Error loading raster data: ", e$message)
 })
 ```
 ```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE}
 # Calculate difference rasters for comparisons
 tryCatch({
  # Calculate weekly difference
  last_week_dif_raster_abs <- (CI - CI_m1)
  safe_log("Calculated weekly difference raster")
  # Calculate three-week difference
  three_week_dif_raster_abs <- (CI - CI_m3)
  safe_log("Calculated three-week difference raster")
 }, error = function(e) {
  safe_log(paste("Error calculating difference rasters:", e$message), "ERROR")
  # Create placeholder rasters if calculations fail
  if (!exists("last_week_dif_raster_abs")) {
    last_week_dif_raster_abs <- CI * 0
  }
  if (!exists("three_week_dif_raster_abs")) {
    three_week_dif_raster_abs <- CI * 0
  }
 })
 ```
 ```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
 # Load field boundaries from parameters
 tryCatch({
  AllPivots0 <- field_boundaries_sf
  safe_log("Successfully loaded field boundaries")
 }, error = function(e) {
  stop("Error loading field boundaries: ", e$message)
 })
 ```
 ```{r create_farm_health_data, message=FALSE, warning=FALSE, include=FALSE}
 # Create farm health summary data from scratch
 tryCatch({
  # Ensure we have the required data
  if (!exists("AllPivots0") || !exists("CI") || !exists("CI_m1") || !exists("harvesting_data")) {
    stop("Required input data (field boundaries, CI data, or harvesting data) not available")
  }
  safe_log("Starting to calculate farm health data")
  # Get unique field names
  fields <- unique(AllPivots0$field)
  safe_log(paste("Found", length(fields), "unique fields"))
  # Initialize result dataframe
  farm_health_data <- data.frame(
    field = character(),
    mean_ci = numeric(),
    ci_change = numeric(),
    ci_uniformity = numeric(),
    status = character(),
    anomaly_type = character(),
    priority_level = numeric(),
    age_weeks = numeric(),
    harvest_readiness = character(),
    stringsAsFactors = FALSE
  )
  # Process each field with robust error handling
  for (field_name in fields) {
    tryCatch({
      safe_log(paste("Processing field:", field_name))
      # Get field boundary
      field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
      # Skip if field shape is empty
      if (nrow(field_shape) == 0) {
        safe_log(paste("Empty field shape for", field_name), "WARNING")
        next
      }
      # Get field age from harvesting data - use direct filtering to avoid dplyr errors
      field_age_data <- NULL
      if (exists("harvesting_data") && !is.null(harvesting_data) && nrow(harvesting_data) > 0) {
        field_age_data <- harvesting_data[harvesting_data$field == field_name, ]
        if (nrow(field_age_data) > 0) {
          field_age_data <- field_age_data[order(field_age_data$season_start, decreasing = TRUE), ][1, ]
        }
      }
      # Default age if not available
      field_age_weeks <- if (!is.null(field_age_data) && nrow(field_age_data) > 0 && !is.na(field_age_data$age)) {
        field_age_data$age
      } else {
        10  # Default age
      }
      # Extract CI values using terra's extract function which is more robust
      ci_values <- terra::extract(CI, field_shape)
      ci_prev_values <- terra::extract(CI_m1, field_shape)
      # Check if we got valid data
      if (nrow(ci_values) == 0 || nrow(ci_prev_values) == 0) {
        safe_log(paste("No CI data extracted for field", field_name), "WARNING")
        # Add a placeholder row with Unknown status
        farm_health_data <- rbind(farm_health_data, data.frame(
          field = field_name,
          mean_ci = NA,
          ci_change = NA,
          ci_uniformity = NA,
          status = "Unknown",
          anomaly_type = "Unknown",
          priority_level = 5,  # Low priority
          age_weeks = field_age_weeks,
          harvest_readiness = "Unknown",
          stringsAsFactors = FALSE
        ))
        next
      }
      # Calculate metrics - Handle NA values properly
      ci_column <- if ("CI" %in% names(ci_values)) "CI" else colnames(ci_values)[1]
      ci_prev_column <- if ("CI" %in% names(ci_prev_values)) "CI" else colnames(ci_prev_values)[1]
      mean_ci <- mean(ci_values[[ci_column]], na.rm=TRUE)
      mean_ci_prev <- mean(ci_prev_values[[ci_prev_column]], na.rm=TRUE)
      ci_change <- mean_ci - mean_ci_prev
      ci_sd <- sd(ci_values[[ci_column]], na.rm=TRUE) 
      ci_uniformity <- ci_sd / max(0.1, mean_ci) # Avoid division by zero
      # Handle NaN or Inf results
      if (is.na(mean_ci) || is.na(ci_change) || is.na(ci_uniformity) ||
          is.nan(mean_ci) || is.nan(ci_change) || is.nan(ci_uniformity) ||
          is.infinite(mean_ci) || is.infinite(ci_change) || is.infinite(ci_uniformity)) {
        safe_log(paste("Invalid calculation results for field", field_name), "WARNING")
        # Add a placeholder row with Unknown status
        farm_health_data <- rbind(farm_health_data, data.frame(
          field = field_name,
          mean_ci = NA,
          ci_change = NA,
          ci_uniformity = NA,
          status = "Unknown",
          anomaly_type = "Unknown",
          priority_level = 5,  # Low priority
          age_weeks = field_age_weeks,
          harvest_readiness = "Unknown",
          stringsAsFactors = FALSE
        ))
        next
      }
      # Determine field status
      status <- dplyr::case_when(
        mean_ci >= 5 ~ "Excellent",
        mean_ci >= 3.5 ~ "Good",
        mean_ci >= 2 ~ "Fair",
        mean_ci >= 1 ~ "Poor",
        TRUE ~ "Critical"
      )
      # Determine anomaly type
      anomaly_type <- dplyr::case_when(
        ci_change > 2 ~ "Potential Weed Growth",
        ci_change < -2 ~ "Potential Weeding/Harvesting",
        ci_uniformity > 0.5 ~ "High Variability",
        mean_ci < 1 ~ "Low Vigor",
        TRUE ~ "None"
      )
      # Calculate priority level (1-5, with 1 being highest priority)
      priority_score <- dplyr::case_when(
        mean_ci < 1 ~ 1,  # Critical - highest priority
        anomaly_type == "Potential Weed Growth" ~ 2,
        anomaly_type == "High Variability" ~ 3,
        ci_change < -1 ~ 4,
        TRUE ~ 5  # No urgent issues
      )
      # Determine harvest readiness
      harvest_readiness <- dplyr::case_when(
        field_age_weeks >= 52 & mean_ci >= 4 ~ "Ready for harvest",
        field_age_weeks >= 48 & mean_ci >= 3.5 ~ "Approaching harvest",
        field_age_weeks >= 40 & mean_ci >= 3 ~ "Mid-maturity",
        field_age_weeks >= 12 ~ "Growing",
        TRUE ~ "Early stage"
      )
      # Add to summary data
      farm_health_data <- rbind(farm_health_data, data.frame(
        field = field_name,
        mean_ci = round(mean_ci, 2),
        ci_change = round(ci_change, 2),
        ci_uniformity = round(ci_uniformity, 2),
        status = status,
        anomaly_type = anomaly_type,
        priority_level = priority_score,
        age_weeks = field_age_weeks,
        harvest_readiness = harvest_readiness,
        stringsAsFactors = FALSE
      ))
    }, error = function(e) {
      safe_log(paste("Error processing field", field_name, ":", e$message), "ERROR")
      # Add a placeholder row with Error status
      farm_health_data <<- rbind(farm_health_data, data.frame(
        field = field_name,
        mean_ci = NA,
        ci_change = NA,
        ci_uniformity = NA,
        status = "Unknown",
        anomaly_type = "Unknown",
        priority_level = 5,  # Low priority since we don't know the status
        age_weeks = NA,
        harvest_readiness = "Unknown",
        stringsAsFactors = FALSE
      ))
    })
  }
  # Make sure we have data for all fields
  if (nrow(farm_health_data) == 0) {
    safe_log("No farm health data was created", "ERROR")
    stop("Failed to create farm health data")
  }
  # Sort by priority level
  farm_health_data <- farm_health_data %>% dplyr::arrange(priority_level, field)
  safe_log(paste("Successfully created farm health data for", nrow(farm_health_data), "fields"))
 }, error = function(e) {
  safe_log(paste("Error creating farm health data:", e$message), "ERROR")
  # Create an empty dataframe that can be filled by the verification chunk
 })
 ```
 ```{r verify_farm_health_data, message=FALSE, warning=FALSE, include=FALSE}
 # Verify farm_health_data exists and has content
 if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
  safe_log("farm_health_data not found or empty, generating default data", "WARNING")
  # Create minimal fallback data
  tryCatch({
    # Get fields from boundaries
    fields <- unique(AllPivots0$field)
    # Create basic data frame with just field names
    farm_health_data <- data.frame(
      field = fields,
      mean_ci = rep(NA, length(fields)),
      ci_change = rep(NA, length(fields)),
      ci_uniformity = rep(NA, length(fields)),
      status = rep("Unknown", length(fields)),
      anomaly_type = rep("Unknown", length(fields)),
      priority_level = rep(5, length(fields)), # Low priority
      age_weeks = rep(NA, length(fields)),
      harvest_readiness = rep("Unknown", length(fields)),
      stringsAsFactors = FALSE
    )
    safe_log("Created fallback farm_health_data with basic field information")
  }, error = function(e) {
    safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
    farm_health_data <<- data.frame(
      field = character(),
      mean_ci = numeric(),
      ci_change = numeric(),
      ci_uniformity = numeric(),
      status = character(),
      anomaly_type = character(),
      priority_level = numeric(),
      age_weeks = numeric(),
      harvest_readiness = character(),
      stringsAsFactors = FALSE
    )
  })
 }
 ```
 ```{r calculate_farm_health, message=FALSE, warning=FALSE, include=FALSE}
 # Calculate farm health summary metrics
 tryCatch({
  # Generate farm health summary data
  farm_health_data <- generate_farm_health_summary(
    field_boundaries = AllPivots0,
    ci_current = CI,
    ci_previous = CI_m1,
    harvesting_data = harvesting_data
  )
  # Log the summary data
  safe_log(paste("Generated farm health summary with", nrow(farm_health_data), "fields"))
 }, error = function(e) {
  safe_log(paste("Error in farm health calculation:", e$message), "ERROR")
  # Create empty dataframe if calculation failed
  farm_health_data <- data.frame(
    field = character(),
    mean_ci = numeric(),
    ci_change = numeric(),
    ci_uniformity = numeric(),
    status = character(),
    anomaly_type = character(),
    priority_level = numeric(),
    age_weeks = numeric(),
    harvest_readiness = character(),
    stringsAsFactors = FALSE
  )
 })
 ```
 ```{r advanced_analytics_functions, message=FALSE, warning=FALSE, include=FALSE}
 # ADVANCED ANALYTICS FUNCTIONS
 # Note: These functions are now imported from executive_report_utils.R
 # The utility file contains functions for velocity/acceleration indicators,
 # anomaly timeline creation, age cohort mapping, and cohort performance charts
 safe_log("Using analytics functions from executive_report_utils.R")
 ```
 \pagebreak
 # Advanced Analytics
 ## Field Health Velocity and Acceleration
 This visualization shows the rate of change in field health (velocity) and whether that change is speeding up or slowing down (acceleration). These metrics help identify if farm conditions are improving, stable, or deteriorating.
 **How to interpret:**
 - **Velocity gauge:** Shows the average weekly change in CI values across all fields
  - Positive values (green/right side): Farm health improving week-to-week
  - Negative values (red/left side): Farm health declining week-to-week
 - **Acceleration gauge:** Shows whether the rate of change is increasing or decreasing
  - Positive values (green/right side): Change is accelerating or improving faster
  - Negative values (red/left side): Change is decelerating or slowing down
 - **4-Week Trend:** Shows the overall CI value trajectory for the past month
 ```{r render_velocity_acceleration, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
 # Render the velocity and acceleration indicators
 tryCatch({
  # Create and display the indicators using the imported utility function
  velocity_plot <- create_velocity_acceleration_indicator(
    health_data = farm_health_data,
    ci_current = CI,
    ci_prev1 = CI_m1,
    ci_prev2 = CI_m2,
    ci_prev3 = CI_m3,
    field_boundaries = AllPivots0
  )
  # Print the visualization
  print(velocity_plot)
  # Create a table of fields with significant velocity changes
  field_ci_metrics <- list()
  # Process each field to get metrics
  fields <- unique(AllPivots0$field)
  for (field_name in fields) {
    tryCatch({
      # Get field boundary
      field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
      if (nrow(field_shape) == 0) next
      # Extract CI values
      ci_curr_values <- terra::extract(CI, field_shape)
      ci_prev1_values <- terra::extract(CI_m1, field_shape)
      # Calculate metrics
      mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
      mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
      velocity <- mean_ci_curr - mean_ci_prev1
      # Store in list
      field_ci_metrics[[field_name]] <- list(
        field = field_name,
        ci_current = mean_ci_curr,
        ci_prev1 = mean_ci_prev1,
        velocity = velocity
      )
    }, error = function(e) {
      safe_log(paste("Error processing field", field_name, "for velocity table:", e$message), "WARNING")
    })
  }
  # Convert list to data frame
  velocity_df <- do.call(rbind, lapply(field_ci_metrics, function(x) {
    data.frame(
      field = x$field,
      ci_current = round(x$ci_current, 2),
      ci_prev1 = round(x$ci_prev1, 2),
      velocity = round(x$velocity, 2),
      direction = ifelse(x$velocity >= 0, "Improving", "Declining")
    )
  }))
  # Select top 5 positive and top 5 negative velocity fields
  top_positive <- velocity_df %>%
    dplyr::filter(velocity > 0) %>%
    dplyr::arrange(desc(velocity)) %>%
    dplyr::slice_head(n = 5)
  top_negative <- velocity_df %>%
    dplyr::filter(velocity < 0) %>%
    dplyr::arrange(velocity) %>%
    dplyr::slice_head(n = 5)
  # Display the tables if we have data
  if (nrow(top_positive) > 0) {
    cat("<h4>Fields with Fastest Improvement</h4>")
    knitr::kable(top_positive %>% 
                dplyr::select(Field = field, 
                            `Current CI` = ci_current, 
                            `Previous CI` = ci_prev1,
                            `Weekly Change` = velocity))
  }
  if (nrow(top_negative) > 0) {
    cat("<h4>Fields with Fastest Decline</h4>")
    knitr::kable(top_negative %>% 
                dplyr::select(Field = field, 
                            `Current CI` = ci_current, 
                            `Previous CI` = ci_prev1,
                            `Weekly Change` = velocity))
  }
 }, error = function(e) {
  safe_log(paste("Error rendering velocity visualization:", e$message), "ERROR")
  cat("<div class='alert alert-danger'>Error generating velocity visualization.</div>")
 })
 ```
 \pagebreak
 ## Field Anomaly Timeline
 This visualization shows the history of detected anomalies in fields across the monitoring period. It helps identify persistent issues or improvements over time.
 **How to interpret:**
 - **X-axis**: Dates of satellite observations
 - **Y-axis**: Fields grouped by similar characteristics 
 - **Colors**: Red indicates negative anomalies, green indicates positive anomalies
 - **Size**: Larger markers indicate stronger anomalies
 ```{r anomaly_timeline, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
 # Generate anomaly timeline visualization
 tryCatch({
  # Use the imported function to create the anomaly timeline
  anomaly_timeline <- create_anomaly_timeline(
    field_boundaries = AllPivots0,
    ci_data = CI_quadrant,
    days_to_include = 90  # Show last 90 days of data
  )
  # Display the timeline
  print(anomaly_timeline)
 }, error = function(e) {
  safe_log(paste("Error generating anomaly timeline:", e$message), "ERROR")
  cat("<div class='alert alert-danger'>Error generating anomaly timeline visualization.</div>")
 })
 ```
 \pagebreak
 ## Field Age Cohorts Map
 This map shows fields grouped by their crop age (weeks since planting). Understanding the distribution of crop ages helps interpret performance metrics and plan harvest scheduling.
 **How to interpret:**
 - **Colors**: Different colors represent different age groups (in weeks since planting)
 - **Labels**: Each field is labeled with its name for easy reference
 - **Legend**: Shows the age ranges in weeks and their corresponding colors
 ```{r age_cohort_map, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
 # Generate age cohort map
 tryCatch({
  # Use the imported function to create the age cohort map
  age_cohort_map <- create_age_cohort_map(
    field_boundaries = AllPivots0,
    harvesting_data = harvesting_data
  )
  # Display the map
  print(age_cohort_map)
 }, error = function(e) {
  safe_log(paste("Error generating age cohort map:", e$message), "ERROR")
  cat("<div class='alert alert-danger'>Error generating age cohort map visualization.</div>")
 })
 ```
 \pagebreak
 ## Cohort Performance Comparison
 This visualization compares chlorophyll index (CI) performance across different age groups of fields. This helps identify if certain age groups are performing better or worse than expected.
 **How to interpret:**
 - **X-axis**: Field age groups in weeks since planting
 - **Y-axis**: Average CI value for fields in that age group
 - **Box plots**: Show the distribution of CI values within each age group
 - **Line**: Shows the expected CI trajectory based on historical data
 ```{r cohort_performance_chart, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
 # Generate cohort performance comparison chart
 tryCatch({
  # Use the imported function to create the cohort performance chart
  cohort_chart <- create_cohort_performance_chart(
    field_boundaries = AllPivots0,
    ci_current = CI,
    harvesting_data = harvesting_data
  )
  # Display the chart
  print(cohort_chart)
 }, error = function(e) {
  safe_log(paste("Error generating cohort performance chart:", e$message), "ERROR")
  cat("<div class='alert alert-danger'>Error generating cohort performance visualization.</div>")
 })
 ```
--- a/r_app/experiments/executive_summary/CI_report_executive_summary_v2.Rmd
+++ b/r_app/experiments/executive_summary/CI_report_executive_summary_v2.Rmd
--- a/r_app/experiments/executive_summary/executive_report_utils.R
+++ b/r_app/experiments/executive_summary/executive_report_utils.R
@ -0,0 +1,437 @@
 # EXECUTIVE REPORT UTILITIES
 # This file contains functions for creating advanced visualizations for the executive summary report
 #' Create a velocity and acceleration indicator for CI change
 #'
 #' @param health_data Current farm health data
 #' @param ci_current Current CI raster
 #' @param ci_prev1 CI raster from 1 week ago
 #' @param ci_prev2 CI raster from 2 weeks ago
 #' @param ci_prev3 CI raster from 3 weeks ago
 #' @param field_boundaries Field boundaries spatial data (sf object)
 #' @return A ggplot2 object with velocity and acceleration gauges
 #'
 create_velocity_acceleration_indicator <- function(health_data, ci_current, ci_prev1, ci_prev2, ci_prev3, field_boundaries) {
  tryCatch({
    # Calculate farm-wide metrics for multiple weeks
    mean_ci_current <- mean(health_data$mean_ci, na.rm = TRUE)
    # Calculate previous week metrics
    # Extract CI values for previous weeks
    field_ci_metrics <- data.frame(field = character(), 
                                 week_current = numeric(),
                                 week_minus_1 = numeric(),
                                 week_minus_2 = numeric(),
                                 week_minus_3 = numeric(),
                                 stringsAsFactors = FALSE)
    # Process each field
    fields <- unique(field_boundaries$field)
    for (field_name in fields) {
      tryCatch({
        # Get field boundary
        field_shape <- field_boundaries %>% dplyr::filter(field == field_name)
        if (nrow(field_shape) == 0) next
        # Extract CI values for all weeks
        ci_curr_values <- terra::extract(ci_current, field_shape)
        ci_prev1_values <- terra::extract(ci_prev1, field_shape)
        ci_prev2_values <- terra::extract(ci_prev2, field_shape)
        ci_prev3_values <- terra::extract(ci_prev3, field_shape)
        # Calculate mean CI for each week
        mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
        mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
        mean_ci_prev2 <- mean(ci_prev2_values$CI, na.rm = TRUE)
        mean_ci_prev3 <- mean(ci_prev3_values$CI, na.rm = TRUE)
        # Add to metrics table
        field_ci_metrics <- rbind(field_ci_metrics, data.frame(
          field = field_name,
          week_current = mean_ci_curr,
          week_minus_1 = mean_ci_prev1,
          week_minus_2 = mean_ci_prev2,
          week_minus_3 = mean_ci_prev3,
          stringsAsFactors = FALSE
        ))
      }, error = function(e) {
        message(paste("Error processing field", field_name, "for velocity indicator:", e$message))
      })
    }
    # Calculate farm-wide averages
    farm_avg <- colMeans(field_ci_metrics[, c("week_current", "week_minus_1", "week_minus_2", "week_minus_3")], na.rm = TRUE)
    # Calculate velocity (rate of change) - current week compared to last week
    velocity <- farm_avg["week_current"] - farm_avg["week_minus_1"]
    # Calculate previous velocity (last week compared to two weeks ago)
    prev_velocity <- farm_avg["week_minus_1"] - farm_avg["week_minus_2"]
    # Calculate acceleration (change in velocity)
    acceleration <- velocity - prev_velocity
    # Prepare data for velocity gauge
    velocity_data <- data.frame(
      label = "Weekly CI Change",
      value = velocity
    )
    # Prepare data for acceleration gauge
    acceleration_data <- data.frame(
      label = "Change Acceleration",
      value = acceleration
    )
    # Create velocity trend data
    trend_data <- data.frame(
      week = c(-3, -2, -1, 0),
      ci_value = c(farm_avg["week_minus_3"], farm_avg["week_minus_2"], 
                  farm_avg["week_minus_1"], farm_avg["week_current"])
    )
    # Create layout grid for the visualizations
    layout_matrix <- matrix(c(1, 1, 2, 2, 3, 3), nrow = 2, byrow = TRUE)
    # Create velocity gauge
    velocity_gauge <- ggplot2::ggplot(velocity_data, ggplot2::aes(x = 0, y = 0)) +
      ggplot2::geom_arc_bar(ggplot2::aes(
        x0 = 0, y0 = 0, 
        r0 = 0.5, r = 1,
        start = -pi/2, end = pi/2,
        fill = "background"
      ), fill = "#f0f0f0") +
      ggplot2::geom_arc_bar(ggplot2::aes(
        x0 = 0, y0 = 0, 
        r0 = 0.5, r = 1,
        start = -pi/2,
        end = -pi/2 + (pi * (0.5 + (velocity / 2))), # Scale to range -1 to +1
        fill = "velocity"
      ), fill = ifelse(velocity >= 0, "#1a9850", "#d73027")) +
      ggplot2::geom_text(ggplot2::aes(label = sprintf("%.2f", velocity)), 
                       size = 8, fontface = "bold") +
      ggplot2::geom_text(ggplot2::aes(label = "Velocity"), y = -0.3, size = 4) +
      ggplot2::coord_fixed() +
      ggplot2::theme_void() +
      ggplot2::scale_fill_manual(values = c("background" = "#f0f0f0", "velocity" = "steelblue"),
                               guide = "none") +
      ggplot2::annotate("text", x = -0.85, y = 0, label = "Declining", 
                      angle = 90, size = 3.5) +
      ggplot2::annotate("text", x = 0.85, y = 0, label = "Improving", 
                      angle = -90, size = 3.5) +
      ggplot2::labs(title = "Farm Health Velocity", 
                  subtitle = "CI change per week") +
      ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
                   plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 12))
    # Create acceleration gauge
    acceleration_gauge <- ggplot2::ggplot(acceleration_data, ggplot2::aes(x = 0, y = 0)) +
      ggplot2::geom_arc_bar(ggplot2::aes(
        x0 = 0, y0 = 0, 
        r0 = 0.5, r = 1,
        start = -pi/2, end = pi/2,
        fill = "background"
      ), fill = "#f0f0f0") +
      ggplot2::geom_arc_bar(ggplot2::aes(
        x0 = 0, y0 = 0, 
        r0 = 0.5, r = 1,
        start = -pi/2,
        end = -pi/2 + (pi * (0.5 + (acceleration / 1))), # Scale to range -0.5 to +0.5
        fill = "acceleration"
      ), fill = ifelse(acceleration >= 0, "#1a9850", "#d73027")) +
      ggplot2::geom_text(ggplot2::aes(label = sprintf("%.2f", acceleration)), 
                       size = 8, fontface = "bold") +
      ggplot2::geom_text(ggplot2::aes(label = "Acceleration"), y = -0.3, size = 4) +
      ggplot2::coord_fixed() +
      ggplot2::theme_void() +
      ggplot2::scale_fill_manual(values = c("background" = "#f0f0f0", "acceleration" = "steelblue"),
                               guide = "none") +
      ggplot2::annotate("text", x = -0.85, y = 0, label = "Slowing", 
                      angle = 90, size = 3.5) +
      ggplot2::annotate("text", x = 0.85, y = 0, label = "Accelerating", 
                      angle = -90, size = 3.5) +
      ggplot2::labs(title = "Change Acceleration", 
                  subtitle = "Increasing or decreasing trend") +
      ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
                   plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 12))
    # Create trend chart
    trend_chart <- ggplot2::ggplot(trend_data, ggplot2::aes(x = week, y = ci_value)) +
      ggplot2::geom_line(size = 1.5, color = "steelblue") +
      ggplot2::geom_point(size = 3, color = "steelblue") +
      ggplot2::geom_hline(yintercept = trend_data$ci_value[1], linetype = "dashed", color = "gray50") +
      ggplot2::labs(
        title = "4-Week CI Trend",
        x = "Weeks from current",
        y = "Average CI Value"
      ) +
      ggplot2::theme_minimal() +
      ggplot2::scale_x_continuous(breaks = c(-3, -2, -1, 0))
    # Create table of top velocity changes
    field_ci_metrics$velocity <- field_ci_metrics$week_current - field_ci_metrics$week_minus_1
    top_velocity_fields <- field_ci_metrics %>%
      dplyr::arrange(desc(abs(velocity))) %>%
      dplyr::slice_head(n = 5) %>%
      dplyr::select(field, velocity) %>%
      dplyr::mutate(direction = ifelse(velocity >= 0, "Improving", "Declining"))
    # Combine into multi-panel figure
    main_plot <- gridExtra::grid.arrange(
      gridExtra::grid.arrange(velocity_gauge, acceleration_gauge, ncol = 2),
      trend_chart,
      heights = c(1.5, 1),
      nrow = 2
    )
    return(main_plot)
  }, error = function(e) {
    message(paste("Error in create_velocity_acceleration_indicator:", e$message))
    return(ggplot2::ggplot() + 
             ggplot2::annotate("text", x = 0, y = 0, label = paste("Error creating velocity indicator:", e$message)) +
             ggplot2::theme_void())
  })
 }
 #' Generate a field health score based on CI values and trends
 #'
 #' @param ci_current Current CI raster
 #' @param ci_change CI change raster
 #' @param field_age_weeks Field age in weeks
 #' @return List containing score, status, and component scores
 #'
 generate_field_health_score <- function(ci_current, ci_change, field_age_weeks) {
  # Get mean CI value for the field
  mean_ci <- terra::global(ci_current, "mean", na.rm=TRUE)[[1]]
  # Get mean CI change
  mean_change <- terra::global(ci_change, "mean", na.rm=TRUE)[[1]]
  # Get CI uniformity (coefficient of variation)
  ci_sd <- terra::global(ci_current, "sd", na.rm=TRUE)[[1]]
  ci_uniformity <- ifelse(mean_ci > 0, ci_sd / mean_ci, 1)
  # Calculate base score from current CI (scale 0-5)
  # Adjusted for crop age - expectations increase with age
  expected_ci <- min(5, field_age_weeks / 10) # Simple linear model
  ci_score <- max(0, min(5, 5 - 2 * abs(mean_ci - expected_ci)))
  # Add points for positive change (scale 0-3)
  change_score <- max(0, min(3, 1 + mean_change))
  # Add points for uniformity (scale 0-2)
  uniformity_score <- max(0, min(2, 2 * (1 - ci_uniformity)))
  # Calculate total score (0-10)
  total_score <- ci_score + change_score + uniformity_score
  # Create status label
  status <- dplyr::case_when(
    total_score >= 8 ~ "Excellent",
    total_score >= 6 ~ "Good",
    total_score >= 4 ~ "Fair",
    total_score >= 2 ~ "Needs Attention",
    TRUE ~ "Critical"
  )
  # Return results
  return(list(
    score = round(total_score, 1),
    status = status,
    components = list(
      ci = round(ci_score, 1),
      change = round(change_score, 1),
      uniformity = round(uniformity_score, 1)
    )
  ))
 }
 #' Create an irrigation recommendation map
 #'
 #' @param ci_current Current CI raster
 #' @param ci_change CI change raster
 #' @param field_shape Field boundary shape
 #' @param title Map title
 #' @return A tmap object with irrigation recommendations
 #'
 create_irrigation_map <- function(ci_current, ci_change, field_shape, title = "Irrigation Priority Zones") {
  # Create a new raster for irrigation recommendations
  irrigation_priority <- ci_current * 0
  # Extract values for processing
  ci_values <- terra::values(ci_current)
  change_values <- terra::values(ci_change)
  # Create priority zones:
  # 3 = High priority (low CI, negative trend)
  # 2 = Medium priority (low CI but stable, or good CI with negative trend)
  # 1 = Low priority (watch, good CI with slight decline)
  # 0 = No action needed (good CI, stable/positive trend)
  priority_values <- rep(NA, length(ci_values))
  # High priority: Low CI (< 2) and negative change (< 0)
  high_priority <- which(ci_values < 2 & change_values < 0 & !is.na(ci_values) & !is.na(change_values))
  priority_values[high_priority] <- 3
  # Medium priority: Low CI (< 2) with stable/positive change, or moderate CI (2-4) with significant negative change (< -1)
  medium_priority <- which(
    (ci_values < 2 & change_values >= 0 & !is.na(ci_values) & !is.na(change_values)) |
    (ci_values >= 2 & ci_values < 4 & change_values < -1 & !is.na(ci_values) & !is.na(change_values))
  )
  priority_values[medium_priority] <- 2
  # Low priority (watch): Moderate/good CI (>= 2) with mild negative change (-1 to 0)
  low_priority <- which(
    ci_values >= 2 & change_values < 0 & change_values >= -1 & !is.na(ci_values) & !is.na(change_values)
  )
  priority_values[low_priority] <- 1
  # No action needed: Good CI (>= 2) with stable/positive change (>= 0)
  no_action <- which(ci_values >= 2 & change_values >= 0 & !is.na(ci_values) & !is.na(change_values))
  priority_values[no_action] <- 0
  # Set values in the irrigation priority raster
  terra::values(irrigation_priority) <- priority_values
  # Create the map
  tm_shape(irrigation_priority) +
    tm_raster(
      style = "cat",
      palette = c("#1a9850", "#91cf60", "#fc8d59", "#d73027"),
      labels = c("No Action", "Watch", "Medium Priority", "High Priority"),
      title = "Irrigation Need"
    ) +
    tm_shape(field_shape) +
    tm_borders(lwd = 2) +
    tm_layout(
      main.title = title,
      legend.outside = FALSE,
      legend.position = c("left", "bottom")
    )
 }
 #' Simple mock function to get weather data for a field
 #' In a real implementation, this would fetch data from a weather API
 #'
 #' @param start_date Start date for weather data
 #' @param end_date End date for weather data
 #' @param lat Latitude of the field center
 #' @param lon Longitude of the field center
 #' @return A data frame of weather data
 #'
 get_weather_data <- function(start_date, end_date, lat = -16.1, lon = 34.7) {
  # This is a mock implementation - in production, you'd replace with actual API call
  # to a service like OpenWeatherMap, NOAA, or other weather data provider
  # Create date sequence
  dates <- seq.Date(from = as.Date(start_date), to = as.Date(end_date), by = "day")
  n_days <- length(dates)
  # Generate some random but realistic weather data with seasonal patterns
  # More rain in summer, less in winter (Southern hemisphere)
  month_nums <- as.numeric(format(dates, "%m"))
  # Simplified seasonal patterns - adjust for your local climate
  is_rainy_season <- month_nums %in% c(11, 12, 1, 2, 3, 4)
  # Generate rainfall - more in rainy season, occasional heavy rainfall
  rainfall <- numeric(n_days)
  rainfall[is_rainy_season] <- pmax(0, rnorm(sum(is_rainy_season), mean = 4, sd = 8))
  rainfall[!is_rainy_season] <- pmax(0, rnorm(sum(!is_rainy_season), mean = 0.5, sd = 2))
  # Add some rare heavy rainfall events
  heavy_rain_days <- sample(which(is_rainy_season), size = max(1, round(sum(is_rainy_season) * 0.1)))
  rainfall[heavy_rain_days] <- rainfall[heavy_rain_days] + runif(length(heavy_rain_days), 20, 50)
  # Generate temperatures - seasonal variation
  temp_mean <- 18 + 8 * sin((month_nums - 1) * pi/6)  # Peak in January (month 1)
  temp_max <- temp_mean + rnorm(n_days, mean = 5, sd = 1)
  temp_min <- temp_mean - rnorm(n_days, mean = 5, sd = 1)
  # Create weather data frame
  weather_data <- data.frame(
    date = dates,
    rainfall_mm = round(rainfall, 1),
    temp_max_c = round(temp_max, 1),
    temp_min_c = round(temp_min, 1),
    temp_mean_c = round((temp_max + temp_min) / 2, 1)
  )
  return(weather_data)
 }
 #' Creates a weather summary visualization integrated with CI data
 #'
 #' @param pivotName Name of the pivot field
 #' @param ci_data CI quadrant data
 #' @param days_to_show Number of days of weather to show
 #' @return ggplot object
 #'
 create_weather_ci_plot <- function(pivotName, ci_data = CI_quadrant, days_to_show = 30) {
  # Get field data
  field_data <- ci_data %>% 
    dplyr::filter(field == pivotName) %>%
    dplyr::arrange(Date) %>%
    dplyr::filter(!is.na(value))
  if (nrow(field_data) == 0) {
    return(ggplot() + 
             annotate("text", x = 0, y = 0, label = "No data available") + 
             theme_void())
  }
  # Get the latest date and 30 days before
  latest_date <- max(field_data$Date, na.rm = TRUE)
  start_date <- latest_date - days_to_show
  # Filter for recent data only
  recent_field_data <- field_data %>% 
    dplyr::filter(Date >= start_date)
  # Get center point coordinates for the field (would be calculated from geometry in production)
  # This is mocked for simplicity
  lat <- -16.1  # Mock latitude
  lon <- 34.7   # Mock longitude
  # Get weather data
  weather_data <- get_weather_data(start_date, latest_date, lat, lon)
  # Aggregate CI data to daily mean across subfields if needed
  daily_ci <- recent_field_data %>%
    dplyr::group_by(Date) %>%
    dplyr::summarize(mean_ci = mean(value, na.rm = TRUE))
  # Create combined plot with dual y-axis
  g <- ggplot() +
    # Rainfall as bars
    geom_col(data = weather_data, aes(x = date, y = rainfall_mm), 
             fill = "#1565C0", alpha = 0.7, width = 0.7) +
    # CI as a line
    geom_line(data = daily_ci, aes(x = Date, y = mean_ci * 10), 
              color = "#2E7D32", size = 1) +
    geom_point(data = daily_ci, aes(x = Date, y = mean_ci * 10), 
               color = "#2E7D32", size = 2) +
    # Temperature range as ribbon
    geom_ribbon(data = weather_data, 
                aes(x = date, ymin = temp_min_c, ymax = temp_max_c),
                fill = "#FF9800", alpha = 0.2) +
    # Primary y-axis (rainfall)
    scale_y_continuous(
      name = "Rainfall (mm)",
      sec.axis = sec_axis(~./10, name = "Chlorophyll Index & Temperature (°C)")
    ) +
    labs(
      title = paste("Field", pivotName, "- Weather and CI Relationship"),
      subtitle = paste("Last", days_to_show, "days"),
      x = "Date"
    ) +
    theme_minimal() +
    theme(
      axis.title.y.left = element_text(color = "#1565C0"),
      axis.title.y.right = element_text(color = "#2E7D32"),
      legend.position = "bottom"
    )
  return(g)
 }
--- a/r_app/experiments/max_ci_field.r
+++ b/r_app/experiments/max_ci_field.r
--- a/r_app/experiments/mosaic_creation_fixed.R
+++ b/r_app/experiments/mosaic_creation_fixed.R
--- a/r_app/experiments/optimal_ci_analysis.R
+++ b/r_app/experiments/optimal_ci_analysis.R
@ -0,0 +1,421 @@
 # Optimal CI Analysis - Day 30 CI values with quadratic fitting
 # Author: SmartCane Analysis Team
 # Date: 2025-06-12
 # Load required libraries
 library(ggplot2)
 library(dplyr)
 library(readr)
 # Set file path
 rds_file_path <- "C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane/laravel_app/storage/app/chemba/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds"
 # Check if file exists
 if (!file.exists(rds_file_path)) {
  stop("RDS file not found at specified path: ", rds_file_path)
 }
 # Load the data
 cat("Loading RDS file...\n")
 ci_data <- readRDS(rds_file_path)
 # Display structure of the data to understand it better
 cat("Data structure:\n")
 str(ci_data)
 cat("\nFirst few rows:\n")
 head(ci_data)
 cat("\nColumn names:\n")
 print(colnames(ci_data))
 # Filter data based on requirements
 cat("\nApplying data filters...\n")
 # 1. Filter out models that don't reach at least DOY 300
 model_doy_max <- ci_data %>%
  group_by(model) %>%
  summarise(max_doy = max(DOY, na.rm = TRUE), .groups = 'drop')
 valid_models <- model_doy_max %>%
  filter(max_doy >= 300) %>%
  pull(model)
 cat(paste("Models before filtering:", n_distinct(ci_data$model), "\n"))
 cat(paste("Models reaching at least DOY 300:", length(valid_models), "\n"))
 ci_data <- ci_data %>%
  filter(model %in% valid_models)
 # 2. Apply IQR filtering per DOY (remove outliers outside IQR)
 cat("Applying IQR filtering per DOY...\n")
 original_rows <- nrow(ci_data)
 ci_data <- ci_data %>%
  group_by(DOY) %>%
  mutate(
    Q1 = quantile(FitData, 0.25, na.rm = TRUE),
    Q3 = quantile(FitData, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    lower_bound = Q1 - 1.5 * IQR,
    upper_bound = Q3 + 1.5 * IQR
  ) %>%
  filter(FitData >= lower_bound & FitData <= upper_bound) %>%
  select(-Q1, -Q3, -IQR, -lower_bound, -upper_bound) %>%
  ungroup()
 filtered_rows <- nrow(ci_data)
 cat(paste("Rows before IQR filtering:", original_rows, "\n"))
 cat(paste("Rows after IQR filtering:", filtered_rows, "\n"))
 cat(paste("Removed", original_rows - filtered_rows, "outliers (", 
          round(100 * (original_rows - filtered_rows) / original_rows, 1), "%)\n"))
 # Check what day values are available after filtering
 if ("DOY" %in% colnames(ci_data)) {
  cat("\nUnique DOY values after filtering:\n")
  print(sort(unique(ci_data$DOY)))
 } else {
  cat("\nNo 'DOY' column found. Available columns:\n")
  print(colnames(ci_data))
 }
 # Extract CI values at day 30 for each field
 cat("\nExtracting day 30 CI values...\n")
 # Set column names based on known structure
 day_col <- "DOY"
 ci_col <- "FitData"
 field_col <- "model"
 # Try different possible field column name combinations
 if ("field" %in% colnames(ci_data)) {
  field_col <- "field"
 } else if ("Field" %in% colnames(ci_data)) {
  field_col <- "Field"
 } else if ("pivot" %in% colnames(ci_data)) {
  field_col <- "pivot"
 } else if ("Pivot" %in% colnames(ci_data)) {
  field_col <- "Pivot"
 } else if ("field_id" %in% colnames(ci_data)) {
  field_col <- "field_id"
 } else if ("Field_ID" %in% colnames(ci_data)) {
  field_col <- "Field_ID"
 }
 # Check if we found the required columns
 if (!("DOY" %in% colnames(ci_data))) {
  stop("DOY column not found in data")
 }
 if (!("FitData" %in% colnames(ci_data))) {
  stop("FitData column not found in data")
 }
 if (is.null(field_col)) {
  cat("Could not automatically identify field column. Please check column names.\n")
  cat("Available columns: ", paste(colnames(ci_data), collapse = ", "), "\n")
  stop("Manual field column identification required")
 }
 cat(paste("Using columns - DOY:", day_col, "Field:", field_col, "FitData:", ci_col, "\n"))
 # Extract day 30 data
 day_30_data <- ci_data %>%
  filter(DOY %% 30 == 0) %>%
  select(field = !!sym(field_col), ci = !!sym(ci_col), DOY) %>%
  na.omit() %>%
  arrange(field)
 cat(paste("Found", nrow(day_30_data), "fields with day 30 CI values\n"))
 if (nrow(day_30_data) == 0) {
  stop("No data found for day 30. Check if day 30 exists in the dataset.")
 }
 # Display summary of day 30 data
 cat("\nSummary of day 30 CI values:\n")
 print(summary(day_30_data))
 # Add field index for plotting (assuming fields represent some spatial or sequential order)
 #day_30_data$field_index <- 1:nrow(day_30_data)
 # Create scatter plot
 p1 <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
  geom_point(color = "blue", size = 3, alpha = 0.7) +
  labs(
    title = "CI Values at Day 30 for All Fields",
    x = "Day of Year (DOY)",
    y = "CI Value",
    subtitle = paste("Total fields:", nrow(day_30_data))
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )
 print(p1)
 # Try multiple curve fitting approaches
 cat("\nTrying different curve fitting approaches...\n")
 # Aggregate data by DOY (take mean CI for each DOY to reduce noise)
 aggregated_data <- day_30_data %>%
  group_by(DOY) %>%
  summarise(
    mean_ci = mean(ci, na.rm = TRUE),
    median_ci = median(ci, na.rm = TRUE),
    count = n(),
    .groups = 'drop'
  ) %>%
  filter(count >= 5)  # Only keep DOY values with sufficient data points
 cat(paste("Aggregated to", nrow(aggregated_data), "DOY points with sufficient data\n"))
 # Create prediction data for smooth curves
 x_smooth <- seq(min(aggregated_data$DOY), max(aggregated_data$DOY), length.out = 100)
 # 1. Quadratic model (as requested)
 cat("\n1. Fitting quadratic model...\n")
 quad_model <- lm(mean_ci ~ poly(DOY, 2, raw = TRUE), data = aggregated_data)
 quad_pred <- predict(quad_model, newdata = data.frame(DOY = x_smooth))
 quad_r2 <- summary(quad_model)$r.squared
 cat(paste("Quadratic R² =", round(quad_r2, 3), "\n"))
 # 2. Logistic growth model: y = K / (1 + exp(-r*(x-x0))) - biologically realistic
 cat("\n2. Fitting logistic growth model...\n")
 tryCatch({
  # Estimate starting values
  K_start <- max(aggregated_data$mean_ci) * 1.1  # Carrying capacity
  r_start <- 0.05  # Growth rate
  x0_start <- mean(aggregated_data$DOY)  # Inflection point
  logistic_model <- nls(mean_ci ~ K / (1 + exp(-r * (DOY - x0))), 
                       data = aggregated_data,
                       start = list(K = K_start, r = r_start, x0 = x0_start),
                       control = nls.control(maxiter = 1000))
  logistic_pred <- predict(logistic_model, newdata = data.frame(DOY = x_smooth))
  logistic_r2 <- 1 - sum(residuals(logistic_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
  cat(paste("Logistic growth R² =", round(logistic_r2, 3), "\n"))
 }, error = function(e) {
  cat("Logistic growth model failed to converge\n")
  logistic_model <- NULL
  logistic_pred <- NULL
  logistic_r2 <- NA
 })
 # 3. Beta function model: good for crop growth curves with clear peak
 cat("\n3. Fitting Beta function model...\n")
 tryCatch({
  # Normalize DOY to 0-1 range for Beta function
  doy_min <- min(aggregated_data$DOY)
  doy_max <- max(aggregated_data$DOY)
  aggregated_data$doy_norm <- (aggregated_data$DOY - doy_min) / (doy_max - doy_min)
  # Beta function: y = a * (x^(p-1)) * ((1-x)^(q-1)) + c
  beta_model <- nls(mean_ci ~ a * (doy_norm^(p-1)) * ((1-doy_norm)^(q-1)) + c,
                   data = aggregated_data,
                   start = list(a = max(aggregated_data$mean_ci) * 20, p = 2, q = 3, c = min(aggregated_data$mean_ci)),
                   control = nls.control(maxiter = 1000))
  # Predict on normalized scale then convert back
  x_smooth_norm <- (x_smooth - doy_min) / (doy_max - doy_min)
  beta_pred <- predict(beta_model, newdata = data.frame(doy_norm = x_smooth_norm))
  beta_r2 <- 1 - sum(residuals(beta_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
  cat(paste("Beta function R² =", round(beta_r2, 3), "\n"))
 }, error = function(e) {
  cat("Beta function model failed to converge\n")
  beta_model <- NULL
  beta_pred <- NULL
  beta_r2 <- NA
 })
 # 4. Gaussian (normal) curve: good for symmetric growth patterns
 cat("\n4. Fitting Gaussian curve...\n")
 tryCatch({
  # Gaussian: y = a * exp(-((x-mu)^2)/(2*sigma^2)) + c
  gaussian_model <- nls(mean_ci ~ a * exp(-((DOY - mu)^2)/(2 * sigma^2)) + c,
                       data = aggregated_data,
                       start = list(a = max(aggregated_data$mean_ci), 
                                   mu = aggregated_data$DOY[which.max(aggregated_data$mean_ci)], 
                                   sigma = 50, 
                                   c = min(aggregated_data$mean_ci)),
                       control = nls.control(maxiter = 1000))
  gaussian_pred <- predict(gaussian_model, newdata = data.frame(DOY = x_smooth))
  gaussian_r2 <- 1 - sum(residuals(gaussian_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
  cat(paste("Gaussian R² =", round(gaussian_r2, 3), "\n"))
 }, error = function(e) {
  cat("Gaussian model failed to converge\n")
  gaussian_model <- NULL
  gaussian_pred <- NULL
  gaussian_r2 <- NA
 })
 # 5. LOESS (local regression) - for comparison
 cat("\n5. Fitting LOESS smoothing...\n")
 loess_model <- loess(mean_ci ~ DOY, data = aggregated_data, span = 0.5)
 loess_pred <- predict(loess_model, newdata = data.frame(DOY = x_smooth))
 loess_r2 <- 1 - sum(residuals(loess_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
 cat(paste("LOESS R² =", round(loess_r2, 3), "\n"))
 # Calculate confidence intervals for both models
 cat("\nCalculating confidence intervals...\n")
 # Function to calculate confidence intervals using residual-based method
 calculate_ci <- function(model, data, newdata, alpha = 0.5) {
  # Get model predictions
  pred_vals <- predict(model, newdata = newdata)
  # For parametric models (lm), use built-in prediction intervals
  if(class(model)[1] == "lm") {
    pred_intervals <- predict(model, newdata = newdata, interval = "confidence", level = 1 - alpha)
    return(list(
      lower = pred_intervals[, "lwr"],
      upper = pred_intervals[, "upr"]
    ))
  }
  # For LOESS, calculate confidence intervals using residual bootstrap
  if(class(model)[1] == "loess") {
    # Calculate residuals from the original model
    fitted_vals <- fitted(model)
    residuals_vals <- residuals(model)
    residual_sd <- sd(residuals_vals, na.rm = TRUE)
    # Use normal approximation for confidence intervals
    # For 50% CI, use 67% quantile (approximately 0.67 standard deviations)
    margin <- qnorm(1 - alpha/2) * residual_sd
    return(list(
      lower = pred_vals - margin,
      upper = pred_vals + margin
    ))
  }
  # Fallback method
  residual_sd <- sd(residuals(model), na.rm = TRUE)
  margin <- qnorm(1 - alpha/2) * residual_sd
  return(list(
    lower = pred_vals - margin,
    upper = pred_vals + margin
  ))
 }# Calculate CIs for quadratic model using aggregated data (same as model fitting)
 quad_ci <- calculate_ci(quad_model, aggregated_data, data.frame(DOY = x_smooth))
 # Calculate CIs for LOESS model using aggregated data (same as model fitting)
 loess_ci <- calculate_ci(loess_model, aggregated_data, data.frame(DOY = x_smooth))
 # Create separate plots for LOESS and Quadratic models
 cat("\nCreating LOESS plot with confidence intervals...\n")
 # LOESS plot
 p_loess <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
  geom_point(color = "lightblue", size = 1.5, alpha = 0.4) +
  geom_point(data = aggregated_data, aes(x = DOY, y = mean_ci), 
             color = "darkblue", size = 3, alpha = 0.8) +
  geom_ribbon(data = data.frame(DOY = x_smooth, 
                               lower = loess_ci$lower, 
                               upper = loess_ci$upper),
              aes(x = DOY, ymin = lower, ymax = upper), 
              alpha = 0.3, fill = "purple", inherit.aes = FALSE) +
  geom_line(data = data.frame(DOY = x_smooth, loess = loess_pred), 
            aes(x = DOY, y = loess), 
            color = "purple", size = 1.5) +
  labs(
    title = "LOESS Model - CI Values Over Growing Season",
    x = "Day of Year (DOY)",
    y = "CI Value",
    subtitle = paste("LOESS R² =", round(loess_r2, 3), "| 50% Confidence Interval")
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )
 # Find optimal point for LOESS
 loess_max_idx <- which.max(loess_pred)
 loess_optimal_doy <- x_smooth[loess_max_idx]
 loess_optimal_ci <- loess_pred[loess_max_idx]
 p_loess <- p_loess + 
  geom_point(aes(x = loess_optimal_doy, y = loess_optimal_ci), 
             color = "red", size = 5, shape = 8) +
  annotate("text", 
           x = loess_optimal_doy + 30, 
           y = loess_optimal_ci,
           label = paste("Optimal: DOY", round(loess_optimal_doy, 1), "\nCI =", round(loess_optimal_ci, 3)),
           color = "red", size = 4, fontface = "bold")
 print(p_loess)
 cat("\nCreating Quadratic plot with confidence intervals...\n")
 # Quadratic plot
 p_quadratic <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
  geom_point(color = "lightcoral", size = 1.5, alpha = 0.4) +
  geom_point(data = aggregated_data, aes(x = DOY, y = mean_ci), 
             color = "darkred", size = 3, alpha = 0.8) +
  geom_ribbon(data = data.frame(DOY = x_smooth, 
                               lower = quad_ci$lower, 
                               upper = quad_ci$upper),
              aes(x = DOY, ymin = lower, ymax = upper), 
              alpha = 0.3, fill = "red", inherit.aes = FALSE) +
  geom_line(data = data.frame(DOY = x_smooth, quadratic = quad_pred), 
            aes(x = DOY, y = quadratic), 
            color = "red", size = 1.5) +
  labs(
    title = "Quadratic Model - CI Values Over Growing Season",
    x = "Day of Year (DOY)",
    y = "CI Value",
    subtitle = paste("Quadratic R² =", round(quad_r2, 3), "| 50% Confidence Interval")
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )
 # Find optimal point for Quadratic
 quad_coeffs <- coef(quad_model)
 a <- quad_coeffs[3]
 b <- quad_coeffs[2]
 if(a != 0) {
  quad_optimal_doy <- -b / (2*a)
  doy_range <- range(aggregated_data$DOY)
  if(quad_optimal_doy >= doy_range[1] && quad_optimal_doy <= doy_range[2]) {
    quad_optimal_ci <- predict(quad_model, newdata = data.frame(DOY = quad_optimal_doy))
  } else {
    quad_optimal_doy <- x_smooth[which.max(quad_pred)]
    quad_optimal_ci <- max(quad_pred)
  }
 } else {
  quad_optimal_doy <- x_smooth[which.max(quad_pred)]
  quad_optimal_ci <- max(quad_pred)
 }
 p_quadratic <- p_quadratic + 
  geom_point(aes(x = quad_optimal_doy, y = quad_optimal_ci), 
             color = "darkred", size = 5, shape = 8) +
  annotate("text", 
           x = quad_optimal_doy + 30, 
           y = quad_optimal_ci,
           label = paste("Optimal: DOY", round(quad_optimal_doy, 1), "\nCI =", round(quad_optimal_ci, 3)),
           color = "darkred", size = 4, fontface = "bold")
 print(p_quadratic)
 print(p_loess)
 # Print results summary
 cat("\n=== RESULTS SUMMARY ===\n")
 cat(paste("LOESS Model - R² =", round(loess_r2, 3), "\n"))
 cat(paste("  Optimal DOY:", round(loess_optimal_doy, 1), "\n"))
 cat(paste("  Optimal CI:", round(loess_optimal_ci, 4), "\n\n"))
 cat(paste("Quadratic Model - R² =", round(quad_r2, 3), "\n"))
 cat(paste("  Optimal DOY:", round(quad_optimal_doy, 1), "\n"))
 cat(paste("  Optimal CI:", round(quad_optimal_ci, 4), "\n"))
--- a/r_app/experiments/plot_testing.R
+++ b/r_app/experiments/plot_testing.R
--- a/r_app/experiments/run_tests.R
+++ b/r_app/experiments/run_tests.R
--- a/r_app/experiments/utils_3.R
+++ b/r_app/experiments/utils_3.R
--- a/r_app/installPackages.R
+++ b/r_app/installPackages.R
@ -1,15 +1,40 @@
-install.packages('CAST')
+# Install required packages for SmartCane project
-install.packages("packages/CIprep_0.1.4.tar.gz",repos=NULL, type="source")
+# This script installs all packages needed to run the CI report dashboard
-install.packages('caret')
+
-install.packages('exactextractr')
+# List of required packages
-install.packages('googledrive')
+required_packages <- c(
-install.packages('here')
+  # Core packages
-install.packages('lubridate')
+  "here", "tidyverse", "sf", "terra", "tmap", "lubridate", 
-install.packages('raster')
+  
-install.packages('readxl')
+  # Additional data manipulation
-install.packages('rsample')
+  "zoo", "readxl", "knitr", "rmarkdown", "dplyr", "purrr", "stringr",
-install.packages('sf')
+  
-install.packages('terra')
+  # Spatial analysis
-install.packages('tidyverse')
+  "exactextractr", 
-install.packages('tmap')
+  
-install.packages('zoo')
+  # Machine learning and statistics
  "rsample", "caret", "randomForest", "CAST"
 )
 # Function to install missing packages
 install_if_missing <- function(pkg) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    message(paste("Installing package:", pkg))
    install.packages(pkg, repos = "https://cloud.r-project.org")
  } else {
    message(paste("Package already installed:", pkg))
  }
 }
 # Install missing packages
 for (pkg in required_packages) {
  install_if_missing(pkg)
 }
 # Load core packages to verify installation
 library(here)
 library(tidyverse)
 library(sf)
 library(terra)
 message("All required packages have been installed!")
--- a/r_app/interpolate_growth_model.R
+++ b/r_app/interpolate_growth_model.R
@ -32,15 +32,20 @@ main <- function() {
    message("No project_dir provided. Using default:", project_dir)
  }
    # Make project_dir available globally so parameters_project.R can use it
  assign("project_dir", project_dir, envir = .GlobalEnv)
  # Initialize project configuration and load utility functions
  tryCatch({
    source("parameters_project.R")
-    source("ci_extraction_utils.R")
+    source("growth_model_utils.R")
  }, error = function(e) {
    warning("Default source files not found. Attempting to source from 'r_app' directory.")
    tryCatch({
-      source("r_app/parameters_project.R")
+      source(here::here("r_app", "parameters_project.R"))
-      source("r_app/ci_extraction_utils.R")
+      source(here::here("r_app", "growth_model_utils.R"))
      warning(paste("Successfully sourced files from 'r_app' directory."))
    }, error = function(e) {
      stop("Failed to source required files from both default and 'r_app' directories.")
    })
@ -92,5 +97,6 @@ main <- function() {
  })
 }
-# Run the main function if the script is executed directly
+if (sys.nframe() == 0) {
-main()
+  main()
 }
--- a/r_app/mosaic_creation.R
+++ b/r_app/mosaic_creation.R
@ -28,19 +28,31 @@ main <- function() {
  # Capture command line arguments
  args <- commandArgs(trailingOnly = TRUE)
    # Process project_dir argument with default
  if (length(args) >= 3 && !is.na(args[3])) {
    project_dir <- as.character(args[3])
  } else {
    # Default project directory
    project_dir <- "chemba"
    message("No project_dir provided. Using default:", project_dir)
  }
  # Make project_dir available globally so parameters_project.R can use it
  assign("project_dir", project_dir, envir = .GlobalEnv)
  # Process end_date argument with default
  if (length(args) >= 1 && !is.na(args[1])) {
    end_date <- as.Date(args[1])
    if (is.na(end_date)) {
      message("Invalid end_date provided. Using current date.")
-      #end_date <- Sys.Date()
+      end_date <- Sys.Date()
-      end_date <- "2023-10-01"  # Default date for testing
+      #end_date <- "2024-08-25"  # Default date for testing
    }
  } else {
    # Default to current date if no argument is provided
-    #end_date <- Sys.Date()
+    end_date <- Sys.Date()
-    end_date <- "2023-10-01"  # Default date for testing
+    #end_date <- "2024-08-25"  # Default date for testing
-    message("No end_date provided. Using current date:", format(end_date))
+    message("No end_date provided. Using current date: ", format(end_date))
  }
  # Process offset argument with default
@ -56,25 +68,20 @@ main <- function() {
    message("No offset provided. Using default:", offset, "days")
  }
-  # Process project_dir argument with default
+
  if (length(args) >= 3 && !is.na(args[3])) {
    project_dir <- as.character(args[3])
  } else {
    # Default project directory
    project_dir <- "chemba"
    message("No project_dir provided. Using default:", project_dir)
  }
  # 3. Initialize project configuration
  # --------------------------------
  tryCatch({
    source("parameters_project.R")
-    source("ci_extraction_utils.R")
+    source("mosaic_creation_utils.R")
    safe_log(paste("Successfully sourced files from default directory."))
  }, error = function(e) {
    warning("Default source files not found. Attempting to source from 'r_app' directory.")
    tryCatch({
-      source("r_app/parameters_project.R")
+      source(here::here("r_app", "parameters_project.R"))
-      source("r_app/ci_extraction_utils.R")
+      source(here::here("r_app", "mosaic_creation_utils.R"))
      warning(paste("Successfully sourced files from 'r_app' directory."))
    }, error = function(e) {
      stop("Failed to source required files from both default and 'r_app' directories.")
    })
@ -82,8 +89,8 @@ main <- function() {
  # 4. Generate date range for processing
  # ---------------------------------
-  dates <- date_list(end_date, offset)
+  dates <- date_list(end_date, offset)  
-  log_message(paste("Processing data for week", dates$week, "of", dates$year))
+  safe_log(paste("Processing data for week", dates$week, "of", dates$year))
  # Create output filename
  file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) {
@ -92,7 +99,7 @@ main <- function() {
    paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")
  }
-  log_message(paste("Output will be saved as:", file_name_tif))
+  safe_log(paste("Output will be saved as:", file_name_tif))
  # 5. Create weekly mosaic using the function from utils
  # -------------------------------------------------
@ -107,5 +114,6 @@ main <- function() {
  )
 }
-# Run the main function if the script is executed directly
+if (sys.nframe() == 0) {
-main()
+  main()
 }
--- a/r_app/mosaic_creation_utils.R
+++ b/r_app/mosaic_creation_utils.R
@ -3,8 +3,7 @@
 # Utility functions for creating weekly mosaics from daily satellite imagery.
 # These functions support cloud cover assessment, date handling, and mosaic creation.
-#' Safe logging function that works whether log_message exists or not
+#' Safe logging function
 #'
 #' @param message The message to log
 #' @param level The log level (default: "INFO")
 #' @return NULL (used for side effects)
@ -158,34 +157,46 @@ count_cloud_coverage <- function(vrt_list, field_boundaries) {
  tryCatch({
    # Calculate total pixel area using the first VRT file
-    total_pix_area <- terra::rast(vrt_list[1]) %>% 
+    total_pix_area <- terra::rast(vrt_list[1]) |>
-      terra::subset(1) %>% 
+      terra::subset(1) |>
-      terra::setValues(1) %>%
+      terra::setValues(1) |>
-      terra::crop(field_boundaries, mask = TRUE) %>%
+      terra::crop(field_boundaries, mask = TRUE) |>
      terra::global(fun = "notNA")
-    # Extract layer 1 from all rasters (for cloud detection)
+    # Process each raster to detect clouds and shadows
-    layer_5_list <- purrr::map(vrt_list, function(file) {
+    processed_rasters <- list()
-      terra::rast(file) %>% terra::subset(1)
+    cloud_masks <- list()
-    }) %>% terra::rast()
+      
    # Create data frame for missing pixels count
    missing_pixels_df <- data.frame(
      filename = vrt_list,
      notNA = numeric(length(vrt_list)),
      total_pixels = numeric(length(vrt_list)),
      missing_pixels_percentage = numeric(length(vrt_list)),
      thres_5perc = numeric(length(vrt_list)),
      thres_40perc = numeric(length(vrt_list))
    )
      # Fill in the data frame with missing pixel statistics
    for (i in seq_along(processed_rasters)) {
      notna_count <- terra::global(processed_rasters[[i]][[1]], fun = "notNA")$notNA
      missing_pixels_df$notNA[i] <- notna_count
      missing_pixels_df$total_pixels[i] <- total_pix_area$notNA
      missing_pixels_df$missing_pixels_percentage[i] <- round(100 - ((notna_count / total_pix_area$notNA) * 100))
      missing_pixels_df$thres_5perc[i] <- as.integer(missing_pixels_df$missing_pixels_percentage[i] < 5)
      missing_pixels_df$thres_40perc[i] <- as.integer(missing_pixels_df$missing_pixels_percentage[i] < 45)
    }
-    # Calculate percentage of missing pixels (clouds)
+    # Store processed rasters and cloud masks as attributes
-    missing_pixels_count <- terra::global(layer_5_list, fun = "notNA") %>%
+    attr(missing_pixels_df, "cloud_masks") <- cloud_masks
-      dplyr::mutate(
+    attr(missing_pixels_df, "processed_rasters") <- processed_rasters
        total_pixels = total_pix_area$notNA,
        missing_pixels_percentage = round(100 - ((notNA / total_pix_area$notNA) * 100)),
        thres_5perc = as.integer(missing_pixels_percentage < 5),
        thres_40perc = as.integer(missing_pixels_percentage < 45)
      )
    # Log results
    safe_log(paste(
      "Cloud cover assessment completed for", length(vrt_list), "files.",
-      sum(missing_pixels_count$thres_5perc), "files with <5% cloud cover,",
+      sum(missing_pixels_df$thres_5perc), "files with <5% cloud cover,",
-      sum(missing_pixels_count$thres_40perc), "files with <45% cloud cover"
+      sum(missing_pixels_df$thres_40perc), "files with <45% cloud cover"
    ))
-    
+      return(missing_pixels_df)
    return(missing_pixels_count)
  }, error = function(e) {
    warning("Error in cloud coverage calculation: ", e$message)
    return(NULL)
@ -209,8 +220,8 @@ create_mosaic <- function(vrt_list, missing_pixels_count, field_boundaries = NUL
    safe_log("No images available for this period, creating empty mosaic", "WARNING")
-    x <- terra::rast(raster_files_final[1]) %>% 
+    x <- terra::rast(raster_files_final[1]) |>
-      terra::setValues(0) %>%
+      terra::setValues(0) |>
      terra::crop(field_boundaries, mask = TRUE)
    names(x) <- c("Red", "Green", "Blue", "NIR", "CI")
@ -227,40 +238,111 @@ create_mosaic <- function(vrt_list, missing_pixels_count, field_boundaries = NUL
    return(x)
  }
-  # Determine best rasters to use based on cloud coverage
+  # Check if we have processed rasters from cloud detection
-  index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc))
+  processed_rasters <- attr(missing_pixels_count, "processed_rasters")
-  index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
+  cloud_masks <- attr(missing_pixels_count, "cloud_masks")
-  # Create mosaic based on available cloud-free images
+  if (!is.null(processed_rasters) && length(processed_rasters) > 0) {
-  if (sum(missing_pixels_count$thres_5perc) > 1) {
+    safe_log("Using cloud-masked rasters for mosaic creation")
    safe_log("Creating max composite from multiple cloud-free images (<5% clouds)")
-    cloudy_rasters_list <- vrt_list[index_5perc]
+    # Determine best rasters to use based on cloud coverage
-    rsrc <- terra::sprc(cloudy_rasters_list)
+    index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc))
-    x <- terra::mosaic(rsrc, fun = "max")
+    index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
  } else if (sum(missing_pixels_count$thres_5perc) == 1) {
    safe_log("Using single cloud-free image (<5% clouds)")
    x <- terra::rast(vrt_list[index_5perc[1]])
  } else if (sum(missing_pixels_count$thres_40perc) > 1) {
    safe_log("Creating max composite from partially cloudy images (<40% clouds)", "WARNING")
    cloudy_rasters_list <- vrt_list[index_40perc]
    rsrc <- terra::sprc(cloudy_rasters_list)
    x <- terra::mosaic(rsrc, fun = "max")
  } else if (sum(missing_pixels_count$thres_40perc) == 1) {
    safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
    x <- terra::rast(vrt_list[index_40perc[1]])
    # Create mosaic based on available cloud-free images
    if (sum(missing_pixels_count$thres_5perc) > 1) {
      safe_log("Creating max composite from multiple cloud-free images (<5% clouds)")
      # Use the cloud-masked rasters instead of original files
      cloudy_rasters_list <- processed_rasters[index_5perc]
      rsrc <- terra::sprc(cloudy_rasters_list)
      x <- terra::mosaic(rsrc, fun = "max")
      # Also create a composite mask showing where data is valid
      mask_list <- cloud_masks[index_5perc]
      mask_rsrc <- terra::sprc(mask_list)
      mask_composite <- terra::mosaic(mask_rsrc, fun = "max")
      attr(x, "cloud_mask") <- mask_composite
    } else if (sum(missing_pixels_count$thres_5perc) == 1) {
      safe_log("Using single cloud-free image (<5% clouds)")
      # Use the cloud-masked raster
      x <- processed_rasters[[index_5perc[1]]]
      attr(x, "cloud_mask") <- cloud_masks[[index_5perc[1]]]
    } else if (sum(missing_pixels_count$thres_40perc) > 1) {
      safe_log("Creating max composite from partially cloudy images (<40% clouds)", "WARNING")
      # Use the cloud-masked rasters
      cloudy_rasters_list <- processed_rasters[index_40perc]
      rsrc <- terra::sprc(cloudy_rasters_list)
      x <- terra::mosaic(rsrc, fun = "max")
      # Also create a composite mask
      mask_list <- cloud_masks[index_40perc]
      mask_rsrc <- terra::sprc(mask_list)
      mask_composite <- terra::mosaic(mask_rsrc, fun = "max") 
      attr(x, "cloud_mask") <- mask_composite
    } else if (sum(missing_pixels_count$thres_40perc) == 1) {
      safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
      # Use the cloud-masked raster
      x <- processed_rasters[[index_40perc[1]]]
      attr(x, "cloud_mask") <- cloud_masks[[index_40perc[1]]]
    } else {
      safe_log("No cloud-free images available, using all cloud-masked images", "WARNING")
      # Use all cloud-masked rasters
      rsrc <- terra::sprc(processed_rasters)
      x <- terra::mosaic(rsrc, fun = "max")
      # Also create a composite mask
      mask_rsrc <- terra::sprc(cloud_masks)
      mask_composite <- terra::mosaic(mask_rsrc, fun = "max")
      attr(x, "cloud_mask") <- mask_composite
    }
  } else {
-    safe_log("No cloud-free images available, using all images", "WARNING")
+    # Fall back to original behavior if no cloud-masked rasters available
    safe_log("No cloud-masked rasters available, using original images", "WARNING")
-    rsrc <- terra::sprc(vrt_list)
+    # Determine best rasters to use based on cloud coverage  
-    x <- terra::mosaic(rsrc, fun = "max")
+    index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc))
    index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
    # Create mosaic based on available cloud-free images
    if (sum(missing_pixels_count$thres_5perc) > 1) {
      safe_log("Creating max composite from multiple cloud-free images (<5% clouds)")
      cloudy_rasters_list <- vrt_list[index_5perc]
      rsrc <- terra::sprc(cloudy_rasters_list)
      x <- terra::mosaic(rsrc, fun = "max")
    } else if (sum(missing_pixels_count$thres_5perc) == 1) {
      safe_log("Using single cloud-free image (<5% clouds)")
      x <- terra::rast(vrt_list[index_5perc[1]])
    } else if (sum(missing_pixels_count$thres_40perc) > 1) {
      safe_log("Creating max composite from partially cloudy images (<40% clouds)", "WARNING")
      cloudy_rasters_list <- vrt_list[index_40perc]
      rsrc <- terra::sprc(cloudy_rasters_list)
      x <- terra::mosaic(rsrc, fun = "max")
    } else if (sum(missing_pixels_count$thres_40perc) == 1) {
      safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
      x <- terra::rast(vrt_list[index_40perc[1]])
    } else {
      safe_log("No cloud-free images available, using all images", "WARNING")
      rsrc <- terra::sprc(vrt_list)
      x <- terra::mosaic(rsrc, fun = "max")
    }
  }
  # Set consistent layer names
@ -288,18 +370,51 @@ save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALS
  # Create full file path
  file_path <- here::here(output_dir, file_name)
  # Get cloud mask if it exists
  cloud_mask <- attr(mosaic_raster, "cloud_mask")
  # Save raster
  terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE)
  # Save cloud mask if available
  if (!is.null(cloud_mask)) {
    # Create mask filename by adding _mask before extension
    mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name)
    mask_file_path <- here::here(output_dir, mask_file_name)
    # Save the mask
    terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE)
    safe_log(paste("Cloud/shadow mask saved to:", mask_file_path))
  }
  # Create plots if requested
  if (plot_result) {
    # Plot the CI band
    if ("CI" %in% names(mosaic_raster)) {
      terra::plot(mosaic_raster$CI, main = paste("CI map", file_name))
    }
    # Plot RGB image
    if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) {
      terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name))
    }
    # Plot cloud mask if available
    if (!is.null(cloud_mask)) {
      terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name), 
                 col = c("red", "green"))
    }
    # If we have both RGB and cloud mask, create a side-by-side comparison
    if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) {
      old_par <- par(mfrow = c(1, 2))
      terra::plotRGB(mosaic_raster, main = "RGB Image")
      # Create a colored mask for visualization (red = cloud/shadow, green = clear)
      mask_plot <- cloud_mask
      terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green"))
      par(old_par)
    }
  }
  # Log save completion
--- a/r_app/report_utils.R
+++ b/r_app/report_utils.R
@ -55,10 +55,9 @@ subchunkify <- function(g, fig_height=7, fig_width=5) {
 #' @param week Week number to display in the title
 #' @param age Age of the crop in weeks
 #' @param borders Whether to display field borders (default: FALSE)
 #' @param use_breaks Whether to use breaks or continuous spectrum for the raster (default: TRUE)
 #' @return A tmap object with the CI map
 #' 
-create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE, use_breaks = TRUE){
+create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE){
  # Input validation
  if (missing(pivot_raster) || is.null(pivot_raster)) {
    stop("pivot_raster is required")
@ -75,34 +74,17 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
  if (missing(age) || is.null(age)) {
    stop("age parameter is required")
  }
-  
+    # Create the base map
-  # Create the base map
+  map <- tm_shape(pivot_raster, unit = "m")  # Add raster with continuous spectrum (fixed scale 1-8 for consistent comparison)
-  map <- tm_shape(pivot_raster, unit = "m")
+  map <- map + tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
-  
+                                                       limits = c(1, 8)),
-  # Add raster with either breaks or continuous spectrum based on parameter
+                          col.legend = tm_legend(title = "CI",
-  if (use_breaks) {
+                                               orientation = if(legend_is_portrait) "portrait" else "landscape", 
-    map <- map + tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf), 
+                                               show = show_legend,
-                            palette = "RdYlGn",
+                                               position = c("left", "bottom")))
-                            legend.is.portrait = legend_is_portrait,
+    # Add layout elements
                            midpoint = NA,
                            title = "CI")
  } else {
    map <- map + tm_raster(palette = "RdYlGn",
                            legend.is.portrait = legend_is_portrait,
                            style = "cont", # Use continuous spectrum
                            title = "CI")
  }
  # Add layout elements
  map <- map + tm_layout(main.title = paste0("Max CI week ", week,"\n",  age, " weeks old"),
-                          main.title.size = 0.7, 
+                          main.title.size = 0.7)
                          legend.show = show_legend,
                          legend.position = c("left", "bottom"),
                        #  legend.width = 0.5,
                        #  legend.height = 0.5,
                        #  legend.text.size = 0.8,
                        #  legend.title.size = 0.9,
                          legend.outside = FALSE)
  # Add borders if requested
  if (borders) {
@ -128,10 +110,9 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
 #' @param week_2 Second week number for comparison
 #' @param age Age of the crop in weeks
 #' @param borders Whether to display field borders (default: TRUE)
 #' @param use_breaks Whether to use breaks or continuous spectrum for the raster (default: TRUE)
 #' @return A tmap object with the CI difference map
 #'
-create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE, use_breaks = TRUE){
+create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE){
  # Input validation
  if (missing(pivot_raster) || is.null(pivot_raster)) {
    stop("pivot_raster is required")
@ -148,35 +129,18 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
  if (missing(age) || is.null(age)) {
    stop("age parameter is required")
  }
-  
+    # Create the base map
-  # Create the base map
+  map <- tm_shape(pivot_raster, unit = "m")  # Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale)
-  map <- tm_shape(pivot_raster, unit = "m")
+  map <- map + tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
-  
+                                                       midpoint = 0,
-  # Add raster with either breaks or continuous spectrum based on parameter
+                                                       limits = c(-3, 3)),
-  if (use_breaks) {
+                          col.legend = tm_legend(title = "CI difference",
-    map <- map + tm_raster(breaks = c(-3,-2,-1,0,1,2,3), 
+                                               orientation = if(legend_is_portrait) "portrait" else "landscape",
-                            palette = "RdYlGn",
+                                               show = show_legend,
-                            legend.is.portrait = legend_is_portrait, 
+                                               position = c("left", "bottom")))
-                            midpoint = 0, 
+    # Add layout elements
                            title = "CI difference")
  } else {
    map <- map + tm_raster(palette = "RdYlGn",
                            legend.is.portrait = legend_is_portrait,
                            style = "cont", # Use continuous spectrum
                            midpoint = 0,
                            title = "CI difference")
  }
  # Add layout elements
  map <- map + tm_layout(main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks old"),
-                          main.title.size = 0.7, 
+                          main.title.size = 0.7)
                          legend.show = show_legend,
                          legend.position = c("left", "bottom"),
                         # legend.width = 0.5,
                         # legend.height = 0.5,
                         # legend.text.size = 0.8,
                         # legend.title.size = 0.9,
                          legend.outside = FALSE)
  # Add borders if requested
  if (borders) {
@ -205,7 +169,6 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
 #' @param week_minus_1 Previous week number
 #' @param week_minus_2 Two weeks ago week number
 #' @param week_minus_3 Three weeks ago week number
 #' @param use_breaks Whether to use discrete breaks or continuous spectrum (default: TRUE)
 #' @param borders Whether to display field borders (default: TRUE)
 #' @return NULL (adds output directly to R Markdown document)
 #'
@ -221,7 +184,6 @@ ci_plot <- function(pivotName,
                    week_minus_1 = week_minus_1,
                    week_minus_2 = week_minus_2,
                    week_minus_3 = week_minus_3,
                    use_breaks = TRUE,
                    borders = TRUE){
  # Input validation
  if (missing(pivotName) || is.null(pivotName) || pivotName == "") {
@ -282,35 +244,28 @@ ci_plot <- function(pivotName,
    # Create spans for borders
    joined_spans2 <- field_boundaries %>% 
-                     sf::st_transform(sf::st_crs(pivotShape)) %>%  
+                     sf::st_transform(sf::st_crs(pivotShape)) %>%                     dplyr::filter(field %in% pivotName)
                     dplyr::filter(field %in% pivotName)
    # Create the maps for different timepoints
    CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2, 
                              show_legend = TRUE, legend_is_portrait = TRUE, 
-                              week = week_minus_2, age = age - 2, borders = borders, 
+                              week = week_minus_2, age = age - 2, borders = borders)
                              use_breaks = use_breaks)
    CImap_m1 <- create_CI_map(singlePivot_m1, AllPivots2, joined_spans2, 
                              show_legend = FALSE, legend_is_portrait = FALSE, 
-                              week = week_minus_1, age = age - 1, borders = borders,
+                              week = week_minus_1, age = age - 1, borders = borders)
                              use_breaks = use_breaks)
    CImap <- create_CI_map(singlePivot, AllPivots2, joined_spans2, 
                          show_legend = FALSE, legend_is_portrait = FALSE, 
-                          week = week, age = age, borders = borders,
+                          week = week, age = age, borders = borders)
-                          use_breaks = use_breaks)
+      # Create difference maps - only show legend on the second one to avoid redundancy
    # Create difference maps - only show legend on the second one to avoid redundancy
    CI_max_abs_last_week <- create_CI_diff_map(abs_CI_last_week, AllPivots2, joined_spans2,  
-                                              show_legend = FALSE, legend_is_portrait = TRUE, 
+                                              show_legend = FALSE, legend_is_portrait = FALSE, 
-                                              week_1 = week, week_2 = week_minus_1, age = age, borders = borders,
+                                              week_1 = week, week_2 = week_minus_1, age = age, borders = borders)
                                              use_breaks = use_breaks)
    CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2, 
                                               show_legend = TRUE, legend_is_portrait = TRUE, 
-                                               week_1 = week, week_2 = week_minus_3, age = age, borders = borders,
+                                               week_1 = week, week_2 = week_minus_3, age = age, borders = borders)
                                               use_breaks = use_breaks)
    # Arrange the maps
    tst <- tmap_arrange(CImap_m2, CImap_m1, CImap, CI_max_abs_last_week, CI_max_abs_three_week, nrow = 1)
--- a/r_app/system_architecture.md
+++ b/r_app/system_architecture.md
@ -0,0 +1,409 @@
 <!-- filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\system_architecture.md -->
 # SmartCane System Architecture
 ## Overview
 The SmartCane system is a comprehensive agricultural intelligence platform that processes satellite imagery and farm data to provide agronomic insights for sugarcane farmers. The system architecture follows a modular, layered approach with clear separation of concerns between data acquisition, processing, and presentation.
 ## Architectural Layers
 The SmartCane system follows a layered architecture pattern, which is a standard approach in software engineering for organizing complex systems. This architecture divides the system into distinct functional layers, each with specific responsibilities. While these layers aren't explicitly shown as separate visual elements in the diagrams, they help conceptualize how components are organized by their function:
 ### 1. Data Acquisition Layer
 - **Role**: Responsible for fetching raw data from external sources and user inputs
 - **Components**: Manual Sentinel Hub Requests, Python API Downloader, User Input Interface
 - **Functions**: Manual request setup on Sentinel Hub Requests Builder for specific client fields, connects to satellite data providers, downloads imagery, manages API credentials, performs preliminary data validation
 ### 2. Processing Layer (SmartCane Engine)
 - **Role**: Core analytical engine that transforms raw data into actionable insights
 - **Components**: Python API Downloader (pre-processing), R Processing Engine (analytics)
 - **Functions**: Image processing, cloud masking, crop index calculation, field boundary processing, statistical analysis, report generation
 ### 3. Presentation Layer
 - **Role**: Delivers insights to end users in accessible formats
 - **Components**: Laravel Web App, Email Delivery System
 - **Functions**: Interactive dashboards, visualization, report delivery, user management, project scheduling
 ### 4. Data Storage Layer
 - **Role**: Persists system data across processing cycles
 - **Components**: File System, Database
 - **Functions**: Stores raw imagery, processed rasters, analytical results, user data, configuration
 ## Key Subsystems
 ### 1. Python API Downloader
 - **Role**: Acquires and pre-processes satellite imagery
 - **Inputs**: API credentials, field boundaries, date parameters, evaluation scripts
 - **Outputs**: Raw satellite images, merged GeoTIFFs, virtual rasters
 - **Interfaces**: External satellite APIs (Planet via Sentinel Hub), file system
 - **Orchestration**: Triggered by shell scripts from the Laravel application
 ### 2. R Processing Engine
 - **Role**: Performs advanced analytics and generates insights
 - **Inputs**: Processed satellite imagery, field boundaries, harvest data, project parameters
 - **Outputs**: Crop indices, mosaics, RDS data files, agronomic reports
 - **Interfaces**: File system, report templates
 - **Orchestration**: Triggered by shell scripts from the Laravel application
 ### 3. Laravel Web Application
 - **Role**: Provides operator interface and orchestrates the overall system
 - **Inputs**: User data, configuration settings
 - **Outputs**: Web interface, scheduling, report delivery
 - **Interfaces**: Users, database, file system
 - **Orchestration**: Controls execution of the SmartCane Engine via shell scripts
 ### 4. Shell Script Orchestration
 - **Role**: Bridges between web application and processing components
 - **Functions**: Triggers processing workflows, manages execution environment, handles errors
 - **Examples**: runcane.sh, runpython.sh, build_mosaic.sh, build_report.sh
 ## Data Flow
 1. **Input Stage**:
   - Operators (internal team) manually prepare and submit requests on Sentinel Hub Requests Builder for the specific fields of a client.
   - Operators (internal team) provide farm data (field boundaries, harvest data) via the Laravel Web App.
   - System schedules data acquisition for specific dates/regions
 2. **Acquisition Stage**:
   - Laravel triggers Python API Downloader via shell scripts
   - Python connects to satellite data providers and downloads raw imagery
   - Downloaded data is stored in the file system
 3. **Processing Stage**:
   - Laravel triggers R Processing Engine via shell scripts
   - R scripts read satellite imagery and farm data
   - Processing produces crop indices, analytics, and reports
   - Results are stored in the file system
 4. **Output Stage**:
   - Laravel Web App accesses processed results
   - Reports are delivered to users via email
 ## System Integration Points
 - **Python-R Integration**: Data handover via file system (GeoTIFF, virtual rasters)
 - **Engine-Laravel Integration**: Orchestration via shell scripts, data exchange via file system and database
 - **User-System Integration**: Web interface, file uploads, email notifications
 ## Developed/Customized Elements
 - **Custom Cloud Masking Algorithm**: Specialized for agricultural applications in tropical regions
 - **Crop Index Extraction Pipeline**: Tailored to sugarcane spectral characteristics
 - **Reporting Templates**: Designed for agronomic decision support
 - **Shell Script Orchestration**: Custom workflow management for the system's components
 ## Strategic Role of Satellite Data
 Satellite data is central to the SmartCane system, providing:
 - Regular, non-invasive field monitoring
 - Detection of spatial patterns not visible from ground level
 - Historical analysis of crop performance
 - Early warning of crop stress or disease
 - Quantification of field variability for precision agriculture
 ## Pilot Utilization Sites
 The SmartCane system is currently operational in Mozambique, Kenya, and Tanzania. Future pilot deployments and expansions are planned for Uganda, Colombia, Mexico, Guatemala, South Africa, and Zambia.
 ---
 ## System Architecture Diagrams
 Below are diagrams illustrating the system architecture from different perspectives.
 ### Overall System Architecture
 This diagram provides a high-level overview of the complete SmartCane system, showing how major components interact. It focuses on the system boundaries and main data flows between the Python API Downloader, R Processing Engine, Laravel Web App, and data storage components. This view helps understand how the system works as a whole.
 ```mermaid
 graph TD
    A["fa:fa-satellite External Satellite Data Providers API"] --> PyDL["fa:fa-download Python API Downloader"];
    C["fa:fa-users Users: Farm Data Input e.g., GeoJSON, Excel"] --> D{"fa:fa-laptop-code Laravel Web App"};
    subgraph SmartCane System
        PyDL --> G["fa:fa-folder-open File System: Raw Satellite Imagery, Rasters, RDS, Reports, Boundaries"];
        E["fa:fa-cogs R Processing Engine"] -- Reads --> G;
        E -- Writes --> G;
        D -- Manages/Triggers --> F["fa:fa-terminal Shell Script Orchestration"];
        F -- Executes --> PyDL;
        F -- Executes --> E;
        D -- Manages/Accesses --> G;
        D -- Reads/Writes --> H["fa:fa-database Database: Project Metadata, Users, Schedules"];
        E -- Generates --> I["fa:fa-file-alt Agronomic Reports: DOCX, HTML"];
        D -- Accesses/Delivers --> I;
    end
    D --> J["fa:fa-desktop Users: Web Interface (future)"];
    I -- Via Email (SMTP) --> K["fa:fa-envelope Users: Email Reports"];
    style E fill:#f9f,stroke:#333,stroke-width:2px
    style D fill:#bbf,stroke:#333,stroke-width:2px
    style PyDL fill:#ffdd57,stroke:#333,stroke-width:2px
 ```
 ### R Processing Engine Detail
 This diagram zooms in on the R Processing Engine subsystem, detailing the internal components and data flow. It shows how raw satellite imagery and field data progress through various R scripts to produce crop indices and reports. The diagram highlights the data transformation pipeline within this analytical core of the SmartCane system.
 ```mermaid
 graph TD
    subgraph R Processing Engine
        direction TB
        subgraph Inputs
            SatelliteImages["fa:fa-image Raw Satellite Imagery"]
            FieldBoundaries["fa:fa-map-marker-alt Field Boundaries .geojson"]
            HarvestData["fa:fa-file-excel Harvest Data .xlsx"]
            ProjectParams["fa:fa-file-code Project Parameters .R"]
        end
        subgraph Core R Scripts & Processes
            ParamConfig("fa:fa-cogs parameters_project.R")
            MosaicScript("fa:fa-images mosaic_creation.R")
            CIExtractionScript("fa:fa-microscope ci_extraction.R")
            ReportUtils("fa:fa-tools executive_report_utils.R")
            DashboardRmd("fa:fa-tachometer-alt CI_report_dashboard_planet_enhanced.Rmd")
            SummaryRmd("fa:fa-list-alt CI_report_executive_summary.Rmd")
        end
        subgraph Outputs
            WeeklyMosaics["fa:fa-file-image Weekly Mosaics .tif"]
            CIDataRDS["fa:fa-database CI Data .rds"]
            CIRasters["fa:fa-layer-group CI Rasters .tif"]
            DashboardReport["fa:fa-chart-bar Dashboard Report .docx/.html"]
            SummaryReport["fa:fa-file-invoice Executive Summary .docx/.html"]
        end
        %% Data Flow
        ProjectParams --> ParamConfig;
        SatelliteImages --> MosaicScript;
        FieldBoundaries --> MosaicScript;
        ParamConfig --> MosaicScript;
        MosaicScript --> WeeklyMosaics;
        WeeklyMosaics --> CIExtractionScript;
        FieldBoundaries --> CIExtractionScript;
        ParamConfig --> CIExtractionScript;
        CIExtractionScript --> CIDataRDS;
        CIExtractionScript --> CIRasters;
        CIDataRDS --> ReportUtils;
        CIRasters --> ReportUtils;
        HarvestData --> ReportUtils;
        ParamConfig --> ReportUtils;
        ReportUtils --> DashboardRmd;
        ReportUtils --> SummaryRmd;
        ParamConfig --> DashboardRmd;
        ParamConfig --> SummaryRmd;
        DashboardRmd --> DashboardReport;
        SummaryRmd --> SummaryReport;
    end
    ShellOrchestration["fa:fa-terminal Shell Scripts e.g., build_mosaic.sh, build_report.sh"] -->|Triggers| R_Processing_Engine["fa:fa-cogs R Processing Engine"]
    style R_Processing_Engine fill:#f9f,stroke:#333,stroke-width:2px
    style Inputs fill:#ccf,stroke:#333,stroke-width:1px
    style Outputs fill:#cfc,stroke:#333,stroke-width:1px
    style Core_R_Scripts_Processes fill:#ffc,stroke:#333,stroke-width:1px
 ```
 ### Python API Downloader Detail
 This diagram focuses on the Python API Downloader subsystem, showing its internal components and workflow. It illustrates how API credentials, field boundaries, and other inputs are processed through various Python functions to download, process, and prepare satellite imagery. This view reveals the technical implementation details of the data acquisition layer.
 ```mermaid
 graph TD
    subgraph Python API Downloader
        direction TB
        subgraph Inputs_Py [Inputs]
            APICreds["fa:fa-key API Credentials (SH_CLIENT_ID, SH_CLIENT_SECRET)"]
            DateRangeParams["fa:fa-calendar-alt Date Range Parameters (days_needed, specific_date)"]
            GeoJSONInput["fa:fa-map-marker-alt Field Boundaries (pivot.geojson)"]
            ProjectConfig["fa:fa-cogs Project Configuration (project_name, paths)"]
            EvalScripts["fa:fa-file-code Evalscripts (JS for cloud masking & band selection)"]
        end
        subgraph Core_Python_Logic_Py [Core Python Logic & Libraries]
            SetupConfig["fa:fa-cog SentinelHubConfig & BYOC Definition"]
            DateSlotGen["fa:fa-calendar-check Date Slot Generation (slots)"]
            GeoProcessing["fa:fa-map GeoJSON Parsing & BBox Splitting (geopandas, BBoxSplitter)"]
            AvailabilityCheck["fa:fa-search-location Image Availability Check (SentinelHubCatalog)"]
            RequestHandler["fa:fa-paper-plane Request Generation (SentinelHubRequest, get_true_color_request_day)"]
            DownloadClient["fa:fa-cloud-download-alt Image Download (SentinelHubDownloadClient, download_function)"]
            MergeUtility["fa:fa-object-group Tile Merging (gdal.BuildVRT, gdal.Translate, merge_files)"]
            CleanupUtility["fa:fa-trash-alt Intermediate File Cleanup (empty_folders)"]
        end
        subgraph Outputs_Py [Outputs]
            RawSatImages["fa:fa-file-image Raw Downloaded Satellite Imagery Tiles (response.tiff in dated subfolders)"]
            MergedTifs["fa:fa-images Merged TIFs (merged_tif/{slot}.tif)"]
            VirtualRasters["fa:fa-layer-group Virtual Rasters (merged_virtual/merged{slot}.vrt)"]
            DownloadLogs["fa:fa-file-alt Console Output Logs (print statements)"]
        end
        ExternalSatAPI["fa:fa-satellite External Satellite Data Providers API (Planet via Sentinel Hub)"]
        %% Data Flow for Python Downloader
        APICreds --> SetupConfig;
        DateRangeParams --> DateSlotGen;
        GeoJSONInput --> GeoProcessing;
        ProjectConfig --> SetupConfig;
        ProjectConfig --> GeoProcessing;
        ProjectConfig --> MergeUtility;
        ProjectConfig --> CleanupUtility;
        EvalScripts --> RequestHandler;
        DateSlotGen -- Available Slots --> AvailabilityCheck;
        GeoProcessing -- BBox List --> AvailabilityCheck;
        SetupConfig --> AvailabilityCheck;
        AvailabilityCheck -- Filtered Slots & BBoxes --> RequestHandler;
        RequestHandler -- Download Requests --> DownloadClient;
        SetupConfig --> DownloadClient;
        DownloadClient -- Downloads Data From --> ExternalSatAPI;
        ExternalSatAPI -- Returns Image Data --> DownloadClient;
        DownloadClient -- Writes --> RawSatImages;
        DownloadClient -- Generates --> DownloadLogs;
        RawSatImages --> MergeUtility;
        MergeUtility -- Writes --> MergedTifs;
        MergeUtility -- Writes --> VirtualRasters;
    end
    ShellOrchestratorPy["fa:fa-terminal Shell Scripts (e.g., runpython.sh triggering planet_download.ipynb)"] -->|Triggers| Python_API_Downloader["fa:fa-download Python API Downloader"];
    style Python_API_Downloader fill:#ffdd57,stroke:#333,stroke-width:2px
    style Inputs_Py fill:#cdeeff,stroke:#333,stroke-width:1px
    style Outputs_Py fill:#d4efdf,stroke:#333,stroke-width:1px
    style Core_Python_Logic_Py fill:#fff5cc,stroke:#333,stroke-width:1px
    style ExternalSatAPI fill:#f5b7b1,stroke:#333,stroke-width:2px
 ```
 ### SmartCane Engine Integration Diagram
 This diagram illustrates the integration of Python and R components within the SmartCane Engine. Unlike the first diagram that shows the overall system, this one specifically focuses on how the two processing components interact with each other and the rest of the system. It emphasizes the orchestration layer and data flows between the core processing components and external systems.
 ```mermaid
 graph TD
    %% External Systems & Users
    Users_DataInput["fa:fa-user Users: Farm Data Input (GeoJSON, Excel, etc.)"] --> Laravel_WebApp;
    ExternalSatAPI["fa:fa-satellite External Satellite Data Providers API"];
    %% Main Application Components
    Laravel_WebApp["fa:fa-globe Laravel Web App (Frontend & Control Plane)"];
    Shell_Orchestration["fa:fa-terminal Shell Script Orchestration (e.g., runcane.sh, runpython.sh, build_mosaic.sh)"];      subgraph SmartCane_Engine ["SmartCane Engine (Data Processing Core)"]
        direction TB
        Python_Downloader["fa:fa-download Python API Downloader"];
        R_Engine["fa:fa-chart-line R Processing Engine"];
    end
    %% Data Storage
    FileSystem["fa:fa-folder File System (Raw Imagery, Rasters, RDS, Reports, Boundaries)"];
    Database["fa:fa-database Database (Project Metadata, Users, Schedules)"];
    %% User Outputs
    Users_WebView["fa:fa-desktop Users: Web Interface (future)"];
    Users_EmailReports["fa:fa-envelope Users: Email Reports (Agronomic Reports)"];
    AgronomicReports["fa:fa-file-alt Agronomic Reports (DOCX, HTML)"];
    %% --- Data Flows & Interactions ---
    %% Laravel to Orchestration & Engine
    Laravel_WebApp -- Manages/Triggers --> Shell_Orchestration;
    Shell_Orchestration -- Executes --> Python_Downloader;
    Shell_Orchestration -- Executes --> R_Engine;
    %% Python Downloader within Engine
    ExternalSatAPI -- Satellite Data --> Python_Downloader;
    Python_Downloader -- Writes Raw Data --> FileSystem;
    %% Inputs to Python (simplified for this view - details in Python-specific diagram)
    %% Laravel_WebApp -- Provides Config/Boundaries --> Python_Downloader; 
    %% R Engine within Engine
    %% Inputs to R (simplified - details in R-specific diagram)
    %% Laravel_WebApp -- Provides Config/Boundaries --> R_Engine;
    R_Engine -- Reads Processed Data/Imagery --> FileSystem;
    R_Engine -- Writes Derived Products --> FileSystem;
    R_Engine -- Generates --> AgronomicReports;
    %% Laravel interaction with Data Storage
    Laravel_WebApp -- Manages/Accesses --> FileSystem;
    Laravel_WebApp -- Reads/Writes --> Database;
    %% Output Delivery
    Laravel_WebApp --> Users_WebView;
    AgronomicReports --> Users_EmailReports;
    %% Assuming a mechanism like SMTP, potentially triggered by Laravel or R-Engine completion
    Laravel_WebApp -- Delivers/Displays --> AgronomicReports;
    %% Styling
    style SmartCane_Engine fill:#e6ffe6,stroke:#333,stroke-width:2px
    style Python_Downloader fill:#ffdd57,stroke:#333,stroke-width:2px
    style R_Engine fill:#f9f,stroke:#333,stroke-width:2px
    style Laravel_WebApp fill:#bbf,stroke:#333,stroke-width:2px
    style Shell_Orchestration fill:#f0ad4e,stroke:#333,stroke-width:2px
    style FileSystem fill:#d1e0e0,stroke:#333,stroke-width:1px
    style Database fill:#d1e0e0,stroke:#333,stroke-width:1px
    style ExternalSatAPI fill:#f5b7b1,stroke:#333,stroke-width:2px
    style AgronomicReports fill:#d4efdf,stroke:#333,stroke-width:1px
 ```
 ## Future Directions
 The SmartCane platform is poised for significant evolution, with several key enhancements and new capabilities planned to further empower users and expand its utility:
 -   **Advanced Management Dashboard**: Development of a more comprehensive and interactive management dashboard to provide users with deeper insights and greater control over their operations.
 -   **Enhanced Yield Prediction Models**: Improving the accuracy and granularity of yield predictions by incorporating more variables and advanced machine learning techniques.
 -   **Integrated Weather and Irrigation Advice**: Leveraging weather forecast data and soil moisture information (potentially from new data sources) to provide precise irrigation scheduling and weather-related agronomic advice.
 -   **AI-Guided Agronomic Advice**: Implementing sophisticated AI algorithms to analyze integrated data (satellite, weather, soil, farm practices) and offer tailored, actionable agronomic recommendations.
 -   **Automated Advice Generation**: Developing capabilities for the system to automatically generate and disseminate critical advice and alerts to users based on real-time data analysis.
 -   **Expanded Data Source Integration**:
    -   **Radar Data**: Incorporating radar satellite imagery (e.g., Sentinel-1) for all-weather monitoring capabilities, particularly useful during cloudy seasons for assessing crop structure, soil moisture, and biomass.
    -   **IoT and Ground Sensors**: Integrating data from in-field IoT devices and soil sensors for highly localized and continuous monitoring of environmental and soil conditions.
 -   **Client-Facing Portal**: Exploration and potential development of a client-facing portal to allow end-users direct access to their data, dashboards, and reports, complementing the current internal management interface.
 These future developments aim to transform SmartCane into an even more powerful decision support system, fostering sustainable and efficient agricultural practices.
 ## Conclusion and Integration Summary
 The SmartCane system architecture demonstrates a well-integrated solution that combines different technologies and subsystems to solve complex agricultural challenges. Here is a summary of how the key subsystems work together:
 ### Subsystem Integration
 1. **Data Flow Sequence**
   - The Laravel Web App initiates the workflow and manages user interactions
   - Shell scripts orchestrate the execution sequence of the processing subsystems
   - The Python API Downloader acquires raw data from external sources
   - The R Processing Engine transforms this data into actionable insights
   - Results flow back to users through the web interface and email reports
 2. **Technology Integration**
   - **Python + R**: Different programming languages are leveraged for their respective strengths—Python for API communication and data acquisition, R for statistical analysis and report generation
   - **Laravel + Processing Engine**: Clear separation between web presentation layer and computational backend
   - **File System + Database**: Hybrid data storage approach with file system for imagery and reports, database for metadata and user information
 3. **Key Integration Mechanisms**
   - **File System Bridge**: The different subsystems primarily communicate through standardized file formats (GeoTIFF, GeoJSON, RDS, DOCX)
   - **Shell Script Orchestration**: Acts as the "glue" between subsystems, ensuring proper execution sequence and environment setup
   - **Standardized Data Formats**: Use of widely-accepted geospatial and data formats enables interoperability
 4. **Extensibility and Scalability**
   - The modular architecture allows for replacement or enhancement of individual components
   - The clear subsystem boundaries enable parallel development and testing
   - Standard interfaces simplify integration of new data sources, algorithms, or output methods
 The SmartCane architecture balances complexity with maintainability by using well-established technologies and clear boundaries between subsystems. The separation of concerns between data acquisition, processing, and presentation layers ensures that changes in one area minimally impact others, while the consistent data flow pathways ensure that information moves smoothly through the system.
--- a/r_app/tests/test_date_functions.R
+++ b/r_app/tests/test_date_functions.R
@ -1,68 +0,0 @@
 # test_date_functions.R
 #
 # Tests for date-related functions in ci_extraction_utils.R
 #
 # Load the test framework
 source("tests/test_framework.R")
 # Set up test environment
 env <- setup_test_env()
 # Load the functions to test
 source("../ci_extraction_utils.R")
 # Test the date_list function
 test_that("date_list creates correct date sequences", {
  # Test with a specific date and offset
  dates <- date_list(as.Date("2023-01-15"), 7)
  # Check the structure
  expect_type(dates, "list")
  expect_equal(names(dates), c("week", "year", "days_filter", "start_date", "end_date"))
  # Check the values
  expect_equal(dates$week, lubridate::week(as.Date("2023-01-09")))
  expect_equal(dates$year, 2023)
  expect_equal(dates$start_date, as.Date("2023-01-09"))
  expect_equal(dates$end_date, as.Date("2023-01-15"))
  expect_equal(length(dates$days_filter), 7)
  expect_equal(dates$days_filter[1], "2023-01-09")
  expect_equal(dates$days_filter[7], "2023-01-15")
  # Test with a different offset
  dates_short <- date_list(as.Date("2023-01-15"), 3)
  expect_equal(length(dates_short$days_filter), 3)
  expect_equal(dates_short$days_filter, c("2023-01-13", "2023-01-14", "2023-01-15"))
  # Test with string date
  dates_string <- date_list("2023-01-15", 5)
  expect_equal(dates_string$days_filter, 
               c("2023-01-11", "2023-01-12", "2023-01-13", "2023-01-14", "2023-01-15"))
  # Test error handling
  expect_error(date_list("invalid-date", 7), 
               "Invalid end_date provided")
  expect_error(date_list("2023-01-15", -1), 
               "Invalid offset provided")
 })
 # Test the date_extract function
 test_that("date_extract correctly extracts dates from file paths", {
  # Test with various file path formats
  expect_equal(date_extract("/some/path/2023-01-15_image.tif"), "2023-01-15")
  expect_equal(date_extract("path/to/planet_2023-01-15.tif"), "2023-01-15")
  expect_equal(date_extract("c:\\path\\with\\windows\\2023-01-15_file.tif"), "2023-01-15")
  expect_equal(date_extract("2023-01-15.tif"), "2023-01-15")
  expect_equal(date_extract("prefix-2023-01-15-suffix.tif"), "2023-01-15")
  # Test with invalid file paths
  expect_warning(result <- date_extract("no-date-here.tif"), "Could not extract date")
  expect_true(is.na(result))
 })
 # Clean up
 teardown_test_env()
 # Print success message
 cat("Date function tests completed successfully\n")
--- a/r_app/tests/test_framework.R
+++ b/r_app/tests/test_framework.R
@ -1,120 +0,0 @@
 # test_framework.R
 #
 # TEST FRAMEWORK FOR SMARTCANE
 # ===========================
 # This script provides a simple testing framework for the SmartCane project.
 # It includes utilities for setting up test environments and running tests.
 #
 # Install required packages if not available
 if (!require("testthat", quietly = TRUE)) {
  install.packages("testthat", repos = "https://cran.rstudio.com/")
 }
 library(testthat)
 # Define paths for testing
 test_root <- file.path(normalizePath(".."), "tests")
 test_data_dir <- file.path(test_root, "test_data")
 # Create test directories if they don't exist
 dir.create(test_data_dir, recursive = TRUE, showWarnings = FALSE)
 # Set up a test environment with all necessary data
 setup_test_env <- function() {
  # Add working directory to the path
  .libPaths(c(.libPaths(), normalizePath("..")))
  # Source required files with minimal dependencies
  tryCatch({
    source(file.path(normalizePath(".."), "packages.R"))
    skip_package_loading <- TRUE
    # Load minimal dependencies for tests
    required_packages <- c("lubridate", "stringr", "purrr", "dplyr", "testthat")
    for (pkg in required_packages) {
      if (!require(pkg, character.only = TRUE, quietly = TRUE)) {
        warning(paste("Package", pkg, "not available, some tests may fail"))
      }
    }
  }, error = function(e) {
    warning("Error loading dependencies: ", e$message)
  })
  # Set up test logging
  assign("log_message", function(message, level = "INFO") {
    cat(paste0("[", level, "] ", message, "\n"))
  }, envir = .GlobalEnv)
  # Create a mock project structure
  test_project <- list(
    project_dir = "test_project",
    data_dir = test_data_dir,
    daily_CI_vals_dir = file.path(test_data_dir, "extracted_ci", "daily_vals"),
    cumulative_CI_vals_dir = file.path(test_data_dir, "extracted_ci", "cumulative_vals"),
    merged_final = file.path(test_data_dir, "merged_final"),
    daily_vrt = file.path(test_data_dir, "daily_vrt")
  )
  # Create the directories
  for (dir in test_project) {
    if (is.character(dir)) {
      dir.create(dir, recursive = TRUE, showWarnings = FALSE)
    }
  }
  return(test_project)
 }
 # Clean up test environment
 teardown_test_env <- function() {
  # Clean up only test-created files if needed
  # We'll leave the main directories for inspection
 }
 # Run all tests in a directory
 run_tests <- function(pattern = "^test_.+\\.R$") {
  test_files <- list.files(
    path = test_root, 
    pattern = pattern,
    full.names = TRUE
  )
  # Exclude this file
  test_files <- test_files[!grepl("test_framework\\.R$", test_files)]
  if (length(test_files) == 0) {
    cat("No test files found matching pattern:", pattern, "\n")
    return(FALSE)
  }
  cat("Found", length(test_files), "test files:\n")
  cat(paste(" -", basename(test_files)), sep = "\n")
  cat("\n")
  # Run each test file
  results <- lapply(test_files, function(file) {
    cat("Running tests in:", basename(file), "\n")
    tryCatch({
      source(file, local = TRUE)
      cat("✓ Tests completed\n\n")
      TRUE
    }, error = function(e) {
      cat("✗ Error:", e$message, "\n\n")
      FALSE
    })
  })
  # Summary
  success_count <- sum(unlist(results))
  cat("\nTest Summary:", success_count, "of", length(test_files), 
      "test files completed successfully\n")
  return(all(unlist(results)))
 }
 # If this script is run directly, run all tests
 if (!interactive() && (basename(sys.frame(1)$ofile) == "test_framework.R")) {
  setup_test_env()
  run_tests()
  teardown_test_env()
 }
--- a/r_app/tests/test_report_utils.R
+++ b/r_app/tests/test_report_utils.R
@ -1,280 +0,0 @@
 # test_report_utils.R
 #
 # Tests for visualization functions in report_utils.R
 #
 # Load the test framework
 source("tests/test_framework.R")
 # Set up test environment
 env <- setup_test_env()
 # Required libraries for testing
 library(testthat)
 library(terra)
 library(sf)
 library(dplyr)
 library(ggplot2)
 # Load the functions to test
 source("../report_utils.R")
 # Create mock data for testing
 create_mock_data <- function() {
  # Create a simple raster for testing
  r <- terra::rast(nrows=10, ncols=10, xmin=0, xmax=10, ymin=0, ymax=10, vals=1:100)
  names(r) <- "CI"
  # Create a simple field boundary
  field_boundaries <- sf::st_sf(
    field = c("Field1", "Field2"),
    sub_field = c("A", "B"),
    geometry = sf::st_sfc(
      sf::st_polygon(list(rbind(c(1,1), c(5,1), c(5,5), c(1,5), c(1,1)))),
      sf::st_polygon(list(rbind(c(6,6), c(9,6), c(9,9), c(6,9), c(6,6))))
    ),
    crs = sf::st_crs(r)
  )
  # Create mock harvest data
  harvesting_data <- data.frame(
    field = c("Field1", "Field2"),
    sub_field = c("A", "B"),
    age = c(100, 150),
    season_start = as.Date(c("2023-01-01", "2023-02-01")),
    year = c(2023, 2023)
  )
  # Create mock CI quadrant data
  ci_quadrant <- data.frame(
    field = rep(c("Field1", "Field2"), each=10),
    sub_field = rep(c("A", "B"), each=10),
    Date = rep(seq(as.Date("2023-01-01"), by="week", length.out=10), 2),
    DOY = rep(1:10, 2),
    cumulative_CI = rep(cumsum(1:10), 2),
    value = rep(1:10, 2),
    season = rep(2023, 20),
    model = rep(c("northwest", "northeast", "southwest", "southeast"), 5)
  )
  return(list(
    raster = r,
    field_boundaries = field_boundaries,
    harvesting_data = harvesting_data,
    ci_quadrant = ci_quadrant
  ))
 }
 # Test the create_CI_map function
 test_that("create_CI_map creates a valid tmap object", {
  mock_data <- create_mock_data()
  # Test with all required parameters
  map <- create_CI_map(
    pivot_raster = mock_data$raster, 
    pivot_shape = mock_data$field_boundaries[1,], 
    pivot_spans = mock_data$field_boundaries[1,],
    week = "01", 
    age = 10, 
    borders = TRUE,
    use_breaks = TRUE
  )
  # Check if it returned a tmap object
  expect_true("tmap" %in% class(map))
  # Test with missing parameters
  expect_error(create_CI_map(pivot_shape = mock_data$field_boundaries[1,], 
                              pivot_spans = mock_data$field_boundaries[1,],
                              week = "01", age = 10), 
               "pivot_raster is required")
  expect_error(create_CI_map(pivot_raster = mock_data$raster, 
                              pivot_spans = mock_data$field_boundaries[1,],
                              week = "01", age = 10), 
               "pivot_shape is required")
 })
 # Test the create_CI_diff_map function
 test_that("create_CI_diff_map creates a valid tmap object", {
  mock_data <- create_mock_data()
  # Test with all required parameters
  map <- create_CI_diff_map(
    pivot_raster = mock_data$raster, 
    pivot_shape = mock_data$field_boundaries[1,], 
    pivot_spans = mock_data$field_boundaries[1,],
    week_1 = "01", 
    week_2 = "02", 
    age = 10, 
    borders = TRUE,
    use_breaks = TRUE
  )
  # Check if it returned a tmap object
  expect_true("tmap" %in% class(map))
  # Test with missing parameters
  expect_error(create_CI_diff_map(pivot_shape = mock_data$field_boundaries[1,], 
                                  pivot_spans = mock_data$field_boundaries[1,],
                                  week_1 = "01", week_2 = "02", age = 10), 
               "pivot_raster is required")
  expect_error(create_CI_diff_map(pivot_raster = mock_data$raster, 
                                  pivot_spans = mock_data$field_boundaries[1,],
                                  week_1 = "01", age = 10), 
               "week_1 and week_2 parameters are required")
 })
 # Test the ci_plot function
 test_that("ci_plot handles input parameters correctly", {
  mock_data <- create_mock_data()
  # Capture output to avoid cluttering the test output
  temp_file <- tempfile()
  sink(temp_file)
  # Test with all required parameters - should not throw an error
  expect_error(
    ci_plot(
      pivotName = "Field1", 
      field_boundaries = mock_data$field_boundaries, 
      current_ci = mock_data$raster, 
      ci_minus_1 = mock_data$raster, 
      ci_minus_2 = mock_data$raster, 
      last_week_diff = mock_data$raster, 
      three_week_diff = mock_data$raster, 
      harvesting_data = mock_data$harvesting_data,
      week = "01",
      week_minus_1 = "52",
      week_minus_2 = "51",
      week_minus_3 = "50",
      use_breaks = TRUE,
      borders = TRUE
    ),
    NA  # Expect no error
  )
  # Test with missing parameters
  expect_error(
    ci_plot(),
    "pivotName is required"
  )
  # Test with invalid field name
  expect_error(
    ci_plot(
      pivotName = "NonExistentField", 
      field_boundaries = mock_data$field_boundaries, 
      current_ci = mock_data$raster, 
      ci_minus_1 = mock_data$raster, 
      ci_minus_2 = mock_data$raster, 
      last_week_diff = mock_data$raster, 
      three_week_diff = mock_data$raster, 
      harvesting_data = mock_data$harvesting_data
    ),
    regexp = NULL  # We expect some error related to the field not being found
  )
  # Reset output
  sink()
  unlink(temp_file)
 })
 # Test the cum_ci_plot function
 test_that("cum_ci_plot handles input parameters correctly", {
  mock_data <- create_mock_data()
  # Capture output to avoid cluttering the test output
  temp_file <- tempfile()
  sink(temp_file)
  # Test with all required parameters - should not throw an error
  expect_error(
    cum_ci_plot(
      pivotName = "Field1", 
      ci_quadrant_data = mock_data$ci_quadrant, 
      plot_type = "value", 
      facet_on = FALSE,
      x_unit = "days"
    ),
    NA  # Expect no error
  )
  # Test with different plot types
  expect_error(
    cum_ci_plot(
      pivotName = "Field1", 
      ci_quadrant_data = mock_data$ci_quadrant, 
      plot_type = "CI_rate"
    ),
    NA  # Expect no error
  )
  expect_error(
    cum_ci_plot(
      pivotName = "Field1", 
      ci_quadrant_data = mock_data$ci_quadrant, 
      plot_type = "cumulative_CI"
    ),
    NA  # Expect no error
  )
  # Test with invalid plot type
  expect_error(
    cum_ci_plot(
      pivotName = "Field1", 
      ci_quadrant_data = mock_data$ci_quadrant, 
      plot_type = "invalid_type"
    ),
    "plot_type must be one of: 'value', 'CI_rate', or 'cumulative_CI'"
  )
  # Test with missing parameters
  expect_error(
    cum_ci_plot(),
    "pivotName is required"
  )
  # Reset output
  sink()
  unlink(temp_file)
 })
 # Test the get_week_path function
 test_that("get_week_path returns correct path", {
  # Test with valid inputs
  path <- get_week_path(
    mosaic_path = "ci_max_mosaics",
    input_date = "2023-01-15",
    week_offset = 0
  )
  # Extract the week number and year from the path
  expect_match(path, "week_02_2023\\.tif$", all = FALSE)  # Week 2 of 2023
  # Test with offset
  path_minus_1 <- get_week_path(
    mosaic_path = "ci_max_mosaics",
    input_date = "2023-01-15",
    week_offset = -1
  )
  expect_match(path_minus_1, "week_01_2023\\.tif$", all = FALSE)
  # Test with missing parameters
  expect_error(
    get_week_path(input_date = "2023-01-15", week_offset = 0),
    "mosaic_path is required"
  )
  expect_error(
    get_week_path(mosaic_path = "ci_max_mosaics", week_offset = 0),
    "input_date is required"
  )
 })
 # Clean up
 teardown_test_env()
 # Print success message
 cat("Report utility function tests completed successfully\n")