Merge branch 'backward-compat-no-tiling' into code-improvements

This commit is contained in:
Timon 2026-02-10 12:23:18 +01:00
commit 51d479673d
36 changed files with 7997 additions and 4538 deletions

View file

@ -319,4 +319,4 @@ After each major stage, verify:
---
_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`. For related Linear issues (code quality, architecture docs), see SC-59, SC-60, SC-61._
_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`.

0
checkout Normal file
View file

View file

@ -22,9 +22,9 @@ Options:
--resolution RES Resolution in meters (default: 3)
--skip-merge Skip merge step (download only, keep individual tiles)
--cleanup Delete intermediate single_images folder after merge
--clear-singles Clear single_images_8b folder before download
--clear-merged Clear merged_tif_8b and merged_virtual_8b folders before download
--clear-all Clear all output folders (singles, merged, virtual) before download
--clear-singles Clear single_images folder before download
--clear-merged Clear merged_tif folder before download
--clear-all Clear all output folders (singles, merged) before download
Examples:
python download_8band_pu_optimized.py xinavane --clear-singles --cleanup
@ -89,20 +89,118 @@ def setup_config():
config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44')
config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos')
# BYOC collection for Planet 8-band data
collection_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a'
byoc = DataCollection.define_byoc(collection_id, name='planet_data_8b', is_timeless=True)
catalog = SentinelHubCatalog(config=config)
return config, byoc, catalog
return config, catalog
def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_days: int = 7) -> Tuple:
"""
Auto-detect which Planet collection is available for this project.
Checks a week of dates (backwards from date_str) to ensure robust detection.
If ANY date has data in the new 8-band collection, use that.
If no dates have data in new collection, fall back to legacy 4-band.
Args:
date_str: Reference date (YYYY-MM-DD)
bbox_list: List of bounding boxes for testing
catalog: SentinelHubCatalog instance
date_range_days: Number of days to check backwards (default: 7)
Returns:
(byoc, collection_info_dict) where byoc is DataCollection and dict contains metadata
"""
new_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' # 8-band (new)
old_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c' # 4-band (legacy)
test_bbox = bbox_list[0]
# Generate date range (backwards from date_str)
try:
ref_date = datetime.datetime.strptime(date_str, '%Y-%m-%d')
except ValueError:
print(f"⚠️ Invalid date format: {date_str}. Using today.")
ref_date = datetime.datetime.now()
date_range = [
(ref_date - datetime.timedelta(days=i)).strftime('%Y-%m-%d')
for i in range(date_range_days)
]
print(f"\nAuto-detecting Planet collection (checking {date_range_days} days)...")
print(f" Test range: {date_range[-1]} to {date_range[0]}")
# Try new collection first
print(f"\n Trying 8-band collection: {new_id}")
byoc_new = DataCollection.define_byoc(new_id, name='planet_data_8b', is_timeless=True)
for test_date in date_range:
try:
search = catalog.search(
collection=byoc_new,
bbox=test_bbox,
time=(test_date, test_date),
filter=None
)
tiles = list(search)
if len(tiles) > 0:
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
print(f" ✓ Using 8-band collection")
return byoc_new, {
'collection_id': new_id,
'name': 'planet_data_8b',
'bands': 4,
'output_folder': 'merged_tif',
'singles_folder': 'single_images'
}
except Exception as e:
print(f" ⚠️ {test_date}: {str(e)[:60]}")
# No data in new collection, try legacy
print(f"\n ✗ No data found in 8-band collection")
print(f" Trying legacy 4-band collection: {old_id}")
byoc_old = DataCollection.define_byoc(old_id, name='planet_data', is_timeless=True)
for test_date in date_range:
try:
search = catalog.search(
collection=byoc_old,
bbox=test_bbox,
time=(test_date, test_date),
filter=None
)
tiles = list(search)
if len(tiles) > 0:
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
print(f" ✓ Using legacy 4-band collection")
return byoc_old, {
'collection_id': old_id,
'name': 'planet_data',
'bands': 4,
'output_folder': 'merged_tif',
'singles_folder': 'single_images'
}
except Exception as e:
print(f" ⚠️ {test_date}: {str(e)[:60]}")
# Neither collection has data
print(f"\n ⚠️ No data found in either collection for {date_range_days} days")
print(f" Defaulting to 8-band collection (will attempt download anyway)")
return byoc_new, {
'collection_id': new_id,
'name': 'planet_data_8b',
'bands': 4,
'output_folder': 'merged_tif',
'singles_folder': 'single_images'
}
# ============================================================================
# EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output
# ============================================================================
EVALSCRIPT_4BAND_MASKED = """
EVALSCRIPT_8BAND = """
//VERSION=3
function setup() {
return {
@ -117,9 +215,35 @@ EVALSCRIPT_4BAND_MASKED = """
}
function evaluatePixel(sample) {
// Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0)
// This reduces output pixels and avoids NaN interpolation on client side
if (sample.udm1 == 0) {
// Scale reflectance: DN [0, 1] range
var scaledRed = 2.5 * sample.red / 10000;
var scaledGreen = 2.5 * sample.green / 10000;
var scaledBlue = 2.5 * sample.blue / 10000;
var scaledNIR = 2.5 * sample.nir / 10000;
return [scaledRed, scaledGreen, scaledBlue, scaledNIR];
} else {
return [NaN, NaN, NaN, NaN];
}
}
"""
EVALSCRIPT_4BAND_LEGACY = """
//VERSION=3
function setup() {
return {
input: [{
bands: ["red", "green", "blue", "nir", "udm1"],
units: "DN"
}],
output: {
bands: 4
}
};
}
function evaluatePixel(sample) {
// Cloud masking for legacy collection (same band names as new 8-band)
// udm1 = 0 means clear, non-zero means cloud/shadow/etc
if (sample.udm1 == 0) {
var scaledRed = 2.5 * sample.red / 10000;
var scaledGreen = 2.5 * sample.green / 10000;
var scaledBlue = 2.5 * sample.blue / 10000;
@ -289,6 +413,7 @@ def download_tile(
output_dir: Path,
config,
byoc,
evalscript: str,
resolution: int = 3
) -> bool:
"""Download a single full tile (no geometry masking = lower PU) with exponential backoff."""
@ -300,9 +425,9 @@ def download_tile(
try:
size = bbox_to_dimensions(bbox, resolution=resolution)
# Create download request with 4-band cloud-masked evalscript (uint16)
# Create download request with appropriate evalscript for collection
request = SentinelHubRequest(
evalscript=EVALSCRIPT_4BAND_MASKED,
evalscript=evalscript,
input_data=[
SentinelHubRequest.input_data(
data_collection=byoc,
@ -350,6 +475,8 @@ def download_date(
base_path: Path,
config,
byoc,
evalscript: str,
collection_info: dict,
resolution: int = 3
) -> int:
"""
@ -357,14 +484,14 @@ def download_date(
Returns number of successfully downloaded tiles.
"""
output_dir = base_path / 'single_images_8b' / date_str
output_dir = base_path / collection_info['singles_folder'] / date_str
output_dir.mkdir(parents=True, exist_ok=True)
print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...")
successful = 0
for idx, bbox in enumerate(bbox_list, 1):
if download_tile(date_str, bbox, output_dir, config, byoc, resolution):
if download_tile(date_str, bbox, output_dir, config, byoc, evalscript, resolution):
successful += 1
percentage = (idx / len(bbox_list)) * 100
@ -385,10 +512,10 @@ def download_date(
# MERGE FUNCTION
# ============================================================================
def merge_tiles(date_str: str, base_path: Path) -> bool:
def merge_tiles(date_str: str, base_path: Path, collection_info: dict) -> bool:
"""Merge downloaded tiles into single GeoTIFF using GDAL."""
single_images_dir = base_path / 'single_images_8b' / date_str
single_images_dir = base_path / collection_info['singles_folder'] / date_str
# Find all response.tiff files
file_list = [str(p) for p in single_images_dir.rglob('response.tiff')]
@ -397,8 +524,8 @@ def merge_tiles(date_str: str, base_path: Path) -> bool:
print(f" ✗ No tiles found to merge")
return False
merged_tif_dir = base_path / 'merged_tif_8b'
merged_vrt_dir = base_path / 'merged_virtual_8b'
merged_tif_dir = base_path / collection_info['output_folder']
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
merged_tif_dir.mkdir(parents=True, exist_ok=True)
merged_vrt_dir.mkdir(parents=True, exist_ok=True)
@ -453,9 +580,9 @@ def main():
# Parse arguments
parser = argparse.ArgumentParser(
description='Download Planet 8-band imagery with PU optimization'
description='Download Planet imagery with PU optimization (auto-detects 8-band vs legacy 4-band)'
)
parser.add_argument('project', help='Project name (angata, chemba, xinavane, etc.)')
parser.add_argument('project', help='Project name (angata, chemba, xinavane, aura, etc.)')
parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today')
parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)')
parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)')
@ -481,7 +608,7 @@ def main():
date_str = datetime.date.today().strftime('%Y-%m-%d')
print(f"{'='*70}")
print(f"Planet 8-Band Download - PU Optimized")
print(f"Planet Download - Auto-Detecting Collection (PU Optimized)")
print(f"{'='*70}")
print(f"Project: {args.project}")
print(f"Date: {date_str}")
@ -489,7 +616,7 @@ def main():
# Setup SentinelHub
print(f"\nSetting up SentinelHub...")
config, byoc, catalog = setup_config()
config, catalog = setup_config()
print(f"✓ SentinelHub configured")
# Load geometries
@ -504,15 +631,26 @@ def main():
print(f"\n✗ No tiles intersect field geometries. Exiting.")
sys.exit(1)
# Auto-detect collection and get evalscript
byoc, collection_info = detect_collection(date_str, bbox_list, catalog, date_range_days=7)
# Get appropriate evalscript
evalscript = EVALSCRIPT_8BAND if collection_info['bands'] == 4 and 'new' not in collection_info.get('note', '') else EVALSCRIPT_8BAND
if '4e56d0cb' not in collection_info['collection_id']:
evalscript = EVALSCRIPT_4BAND_LEGACY
print(f"\n Collection: {collection_info['name']}")
print(f" Output folder: {collection_info['output_folder']}/")
# Check date availability
print(f"\nChecking data availability...")
print(f"\nChecking data availability for {date_str}...")
if not check_date_has_data(date_str, bbox_list[0], catalog, byoc):
print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.")
sys.exit(0)
# Download tiles
print(f"\n{'='*70}")
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, args.resolution)
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, evalscript, collection_info, args.resolution)
if downloaded == 0:
print(f"\n✗ No tiles downloaded. Exiting.")
@ -522,20 +660,20 @@ def main():
if not args.skip_merge:
print(f"\n{'='*70}")
print(f"Merging tiles...")
if merge_tiles(date_str, base_path):
if merge_tiles(date_str, base_path, collection_info):
print(f"✓ Merge complete")
# Cleanup intermediate files
if args.cleanup:
print(f"\nCleaning up intermediate files...")
import shutil
single_images_dir = base_path / 'single_images_8b' / date_str
merged_vrt_dir = base_path / 'merged_virtual_8b'
single_images_dir = base_path / collection_info['singles_folder'] / date_str
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
try:
if single_images_dir.exists():
shutil.rmtree(single_images_dir)
print(f" ✓ Deleted {single_images_dir.name}/{date_str}")
print(f" ✓ Deleted {collection_info['singles_folder']}/{date_str}")
# Clean old VRT files
for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"):
@ -549,7 +687,7 @@ def main():
print(f"\n{'='*70}")
print(f"✓ Done!")
print(f"Output: {base_path / 'merged_tif_8b' / f'{date_str}.tif'}")
print(f"Output: {base_path / collection_info['output_folder'] / f'{date_str}.tif'}")
print(f"{'='*70}")

View file

@ -98,9 +98,9 @@ byoc = DataCollection.define_byoc(
def setup_paths(project):
"""Create and return folder paths."""
BASE_PATH = Path('../laravel_app/storage/app') / project
BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images_8b')
folder_for_merged_tifs = str(BASE_PATH / 'merged_tif_8b')
folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual_8b')
BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images')
folder_for_merged_tifs = str(BASE_PATH / 'merged_tif')
folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual')
geojson_file = Path(BASE_PATH / 'Data' / 'pivot.geojson')
# Create folders if missing

356
r_app/00_common_utils.R Normal file
View file

@ -0,0 +1,356 @@
# ==============================================================================
# # 00_COMMON_UTILS.R
# ==============================================================================
# # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE
# #
# # PURPOSE:
# # Centralized location for foundational utilities used across multiple scripts.
# # These functions have NO project knowledge, NO client-type dependencies,
# # NO domain-specific logic.
# #
# # USAGE:
# # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file:
# #
# # source(here::here("r_app", "parameters_project.R")) # Config first
# # source(here::here("r_app", "00_common_utils.R")) # Then common utilities
# #
# # FUNCTIONS:
# # 1. safe_log() — Generic logging with [LEVEL] prefix
# # 2. smartcane_debug() — Conditional debug logging
# # 3. smartcane_warn() — Convenience wrapper for WARN-level messages
# # 4. date_list() — Generate date sequences for processing windows
# # 5. load_field_boundaries() — Load field geometries from GeoJSON
# # 6. repair_geojson_geometries() — Fix invalid geometries in GeoJSON objects
# #
# # DATE FUNCTIONS (now in parameters_project.R):
# # - get_iso_week() — Extract ISO week number from date
# # - get_iso_year() — Extract ISO year from date
# # - get_iso_week_year() — Extract both ISO week and year as list
# # - format_week_label() — Format date as week/year label
# # - load_harvesting_data() — Load harvest schedule from Excel
# #
# ==============================================================================
# # Source parameters first to get shared date utility functions
# source("parameters_project.R")
# #' Safe Logging Function
# #'
# #' Generic logging with [LEVEL] prefix. Works standalone without any framework.
# #' Consistent with SmartCane logging standard.
# #'
# #' @param message The message to log
# #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' safe_log("Processing started", "INFO")
# #' safe_log("Check input file", "WARNING")
# #' safe_log("Failed to load data", "ERROR")
# #'
# safe_log <- function(message, level = "INFO") {
# prefix <- sprintf("[%s]", level)
# cat(sprintf("%s %s\n", prefix, message))
# }
# #' SmartCane Debug Logging (Conditional)
# #'
# #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
# #' Useful for development/troubleshooting without cluttering normal output.
# #'
# #' @param message The message to log
# #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
# #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
# #'
# smartcane_debug <- function(message, verbose = FALSE) {
# if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
# return(invisible(NULL))
# }
# safe_log(message, level = "DEBUG")
# }
# #' SmartCane Warning Logging
# #'
# #' Logs WARN-level messages. Convenience wrapper around safe_log().
# #'
# #' @param message The message to log
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' smartcane_warn("Check data format before proceeding")
# #'
# smartcane_warn <- function(message) {
# safe_log(message, level = "WARN")
# }
# #' Load Field Boundaries from GeoJSON
# #'
# #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
# #' Handles CRS validation and column standardization.
# #'
# #' @param data_dir Directory containing GeoJSON file
# #' @return List with elements:
# #' - field_boundaries_sf: sf (Simple Features) object
# #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
# #'
# #' @details
# #' Automatically selects pivot_2.geojson for ESA project during CI extraction,
# #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
# #'
# #' @examples
# #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
# #' head(boundaries$field_boundaries_sf)
# #'
# load_field_boundaries <- function(data_dir) {
# # Choose field boundaries file based on project and script type
# # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
# # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
# use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
# exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
# if (use_pivot_2) {
# field_boundaries_path <- file.path(data_dir, "pivot_2.geojson")
# } else {
# field_boundaries_path <- file.path(data_dir, "pivot.geojson")
# }
# if (!file.exists(field_boundaries_path)) {
# stop(paste("Field boundaries file not found at path:", field_boundaries_path))
# }
# tryCatch({
# # Read GeoJSON with explicit CRS handling
# field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
# # Remove OBJECTID column immediately if it exists
# if ("OBJECTID" %in% names(field_boundaries_sf)) {
# field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
# }
# # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
# # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
# # to prevent S2 geometry validation errors during downstream processing
# field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
# # Validate and fix CRS if needed
# tryCatch({
# # Simply assign WGS84 if not already set (safe approach)
# if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
# st_crs(field_boundaries_sf) <- 4326
# warning("CRS was missing, assigned WGS84 (EPSG:4326)")
# }
# }, error = function(e) {
# tryCatch({
# st_crs(field_boundaries_sf) <<- 4326
# }, error = function(e2) {
# warning(paste("Could not set CRS:", e2$message))
# })
# })
# # Handle column names - accommodate optional sub_area column
# if ("sub_area" %in% names(field_boundaries_sf)) {
# field_boundaries_sf <- field_boundaries_sf %>%
# dplyr::select(field, sub_field, sub_area) %>%
# sf::st_set_geometry("geometry")
# } else {
# field_boundaries_sf <- field_boundaries_sf %>%
# dplyr::select(field, sub_field) %>%
# sf::st_set_geometry("geometry")
# }
# # Convert to terra vector if possible, otherwise use sf
# field_boundaries <- tryCatch({
# field_boundaries_terra <- terra::vect(field_boundaries_sf)
# crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
# crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
# if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
# terra::crs(field_boundaries_terra) <- "EPSG:4326"
# warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
# }
# field_boundaries_terra
# }, error = function(e) {
# warning(paste("Terra conversion failed, using sf object instead:", e$message))
# field_boundaries_sf
# })
# return(list(
# field_boundaries_sf = field_boundaries_sf,
# field_boundaries = field_boundaries
# ))
# }, error = function(e) {
# cat("[DEBUG] Error in load_field_boundaries:\n")
# cat(" Message:", e$message, "\n")
# cat(" Call:", deparse(e$call), "\n")
# stop(paste("Error loading field boundaries:", e$message))
# })
# }
# #' Generate a Sequence of Dates for Processing
# #'
# #' Creates a date range from start_date to end_date and extracts week/year info.
# #' Used by Scripts 20, 30, 40 to determine data processing windows.
# #'
# #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
# #' @param offset Number of days to look back from end_date (e.g., 7 for one week)
# #' @return A list containing:
# #' - week: ISO week number of start_date
# #' - year: ISO year of start_date
# #' - days_filter: Vector of dates in "YYYY-MM-DD" format
# #' - start_date: Start date as Date object
# #' - end_date: End date as Date object
# #'
# #' @details
# #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
# #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
# #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
# #'
# #' @examples
# #' dates <- date_list(as.Date("2025-01-15"), offset = 7)
# #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
# #'
# #' dates <- date_list("2025-12-31", offset = 14)
# #' # Handles string input and returns 14 days of data
# #'
# date_list <- function(end_date, offset) {
# # Input validation
# if (!lubridate::is.Date(end_date)) {
# end_date <- as.Date(end_date)
# if (is.na(end_date)) {
# stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
# }
# }
# offset <- as.numeric(offset)
# if (is.na(offset) || offset < 1) {
# stop("Invalid offset provided. Expected a positive number.")
# }
# # Calculate date range
# offset <- offset - 1 # Adjust offset to include end_date
# start_date <- end_date - lubridate::days(offset)
# # Extract ISO week and year information (from END date for reporting period)
# week <- lubridate::isoweek(end_date)
# year <- lubridate::isoyear(end_date)
# # Generate sequence of dates
# days_filter <- seq(from = start_date, to = end_date, by = "day")
# days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
# # Log the date range
# safe_log(paste("Date range generated from", start_date, "to", end_date))
# return(list(
# "week" = week,
# "year" = year,
# "days_filter" = days_filter,
# "start_date" = start_date,
# "end_date" = end_date
# ))
# }
# ==============================================================================
# #' Repair Invalid GeoJSON Geometries
# #'
# #' Fixes common geometry issues in GeoJSON/sf objects:
# #' - Degenerate vertices (duplicate points)
# #' - Self-intersecting polygons
# #' - Invalid ring orientation
# #' - Empty or NULL geometries
# #'
# #' Uses sf::st_make_valid() with buffer trick as fallback.
# #'
# #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
# #' @return sf object with repaired geometries
# #'
# #' @details
# #' **Why this matters:**
# #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
# #' rings from manual editing or GIS data sources. These cause errors when using
# #' S2 geometry (strict validation) during cropping operations.
# #'
# #' **Repair strategy (priority order):**
# #' 1. Try st_make_valid() - GEOS-based repair (most reliable)
# #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
# #' 3. Last resort: Silently keep original if repair fails
# #'
# #' @examples
# #' \dontrun{
# #' fields <- st_read("pivot.geojson")
# #' fields_fixed <- repair_geojson_geometries(fields)
# #' cat(paste("Fixed geometries: before=",
# #' nrow(fields[!st_is_valid(fields), ]),
# #' ", after=",
# #' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
# #' }
# #'
# repair_geojson_geometries <- function(sf_object) {
# if (!inherits(sf_object, "sf")) {
# stop("Input must be an sf (Simple Features) object")
# }
# # Count invalid geometries BEFORE repair
# invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
# if (invalid_before == 0) {
# safe_log("All geometries already valid - no repair needed", "INFO")
# return(sf_object)
# }
# safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
# # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
# # This handles degenerate vertices, self-intersections, invalid rings while preserving all features
# repaired <- tryCatch({
# # st_make_valid() on entire sf object preserves all features and attributes
# repaired_geom <- sf::st_make_valid(sf_object)
# # Verify we still have the same number of rows
# if (nrow(repaired_geom) != nrow(sf_object)) {
# warning("st_make_valid() changed number of features - attempting row-wise repair")
# # Fallback: Repair row-by-row to maintain original structure
# repaired_geom <- sf_object
# for (i in seq_len(nrow(sf_object))) {
# tryCatch({
# if (!sf::st_is_valid(sf_object[i, ])) {
# repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
# }
# }, error = function(e) {
# safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
# })
# }
# }
# safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
# repaired_geom
# }, error = function(e) {
# safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
# NULL
# })
# # If repair failed, keep original
# if (is.null(repaired)) {
# safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
# "WARNING")
# return(sf_object)
# }
# # Count invalid geometries AFTER repair
# invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
# safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
# return(repaired)
# }
# ==============================================================================
# # END 00_COMMON_UTILS.R
# ==============================================================================

View file

@ -1,499 +0,0 @@
#' Combined: Create master grid and split TIFFs into tiles
#' ====================================================================
#'
#' Purpose:
#' 1. Check all daily TIFFs for matching extents
#' 2. Create master 5×5 grid covering all TIFFs
#' 3. Split each daily TIFF into 25 tiles using the master grid
#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18
library(terra)
library(sf)
# ============================================================================
# CONFIGURATION & COMMAND-LINE ARGUMENTS
# ============================================================================
# Parse command-line arguments for date filtering
args <- commandArgs(trailingOnly = TRUE)
# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17
start_date <- NULL
end_date <- NULL
if (length(args) >= 1) {
start_date <- as.Date(args[1])
cat("Filtering: start date =", as.character(start_date), "\n")
}
if (length(args) >= 2) {
end_date <- as.Date(args[2])
cat("Filtering: end date =", as.character(end_date), "\n")
}
PROJECT <- "angata"
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
GRID_NROWS <- 5
GRID_NCOLS <- 5
# Construct grid-specific subfolder path
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
# Load field boundaries for overlap checking
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
# ============================================================================
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
# ============================================================================
cat("\n[PART 1] Creating Master Grid\n")
# Load field boundaries for overlap checking
cat("\n[1] Checking for existing master grid...\n")
# Check if master grid already exists
MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
if (file.exists(MASTER_GRID_PATH)) {
cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "")
master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE)
field_boundaries_sf <- NULL # No need to load pivot.geojson
field_boundaries_vect <- NULL
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
} else {
# No existing grid - need to create one from pivot.geojson
cat(" No existing grid found. Creating new one from pivot.geojson...\n")
if (!file.exists(GEOJSON_PATH)) {
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
"Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ",
"from the same directory as a previous successful run (grid already exists).")
}
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "")
}
# Try to find a name column (only if field_boundaries_sf exists)
if (!is.null(field_boundaries_sf)) {
field_names <- NA
if ("name" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$name
} else if ("field" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$field
} else if ("field_name" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$field_name
} else {
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
}
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
}
# Helper function: Check if a tile overlaps with any field (simple bbox overlap)
tile_overlaps_fields <- function(tile_extent, field_geoms) {
tryCatch({
# Simple bounding box overlap test - no complex geometry operations
# Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR
# box1.ymax < box2.ymin OR box1.ymin > box2.ymax)
# For each field geometry, check if it overlaps with tile bbox
for (i in seq_len(length(field_geoms))) {
# Skip empty geometries
if (st_is_empty(field_geoms[i])) {
next
}
# Get field bbox
field_bbox <- st_bbox(field_geoms[i])
# Check bbox overlap (simple coordinate comparison)
x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax)
y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax)
if (x_overlap && y_overlap) {
return(TRUE) # Found overlap!
}
}
return(FALSE) # No overlap found
}, error = function(e) {
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
return(TRUE) # Default to including tile if there's an error
})
}
cat("\n[2] Checking TIFF extents...\n")
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
tiff_files <- sort(tiff_files)
# Filter by date range if specified
if (!is.null(start_date) || !is.null(end_date)) {
cat("\nApplying date filter...\n")
file_dates <- as.Date(sub("\\.tif$", "", tiff_files))
if (!is.null(start_date) && !is.null(end_date)) {
keep_idx <- file_dates >= start_date & file_dates <= end_date
cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "")
} else if (!is.null(start_date)) {
keep_idx <- file_dates >= start_date
cat(" From: ", as.character(start_date), "\n", sep = "")
} else {
keep_idx <- file_dates <= end_date
cat(" Until: ", as.character(end_date), "\n", sep = "")
}
tiff_files <- tiff_files[keep_idx]
cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "")
}
if (length(tiff_files) == 0) {
stop("No TIFF files found in ", TIFF_FOLDER)
}
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
cat(" Checking extents... (this may take a while)\n")
# Load all extents - ONE TIME, upfront
extents <- list()
for (i in seq_along(tiff_files)) {
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
raster <- terra::rast(tiff_path)
ext <- terra::ext(raster)
extents[[i]] <- ext
# Progress indicator every 50 files
if (i %% 50 == 0) {
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
}
}
cat(" ✓ All extents loaded\n")
# Check if all extents match
cat("\n[3] Comparing extents...\n")
tolerance <- 1e-8
all_match <- TRUE
first_ext <- extents[[1]]
for (i in 2:length(extents)) {
curr_ext <- extents[[i]]
match <- (
abs(curr_ext$xmin - first_ext$xmin) < tolerance &&
abs(curr_ext$xmax - first_ext$xmax) < tolerance &&
abs(curr_ext$ymin - first_ext$ymin) < tolerance &&
abs(curr_ext$ymax - first_ext$ymax) < tolerance
)
if (!match) {
all_match <- FALSE
cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "")
cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ",
"Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "")
cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ",
"Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "")
}
}
if (all_match) {
cat(" ✓ All TIFF extents MATCH perfectly!\n")
} else {
cat(" ⚠️ Extents differ - creating master extent covering all\n")
}
# Create master extent
cat("\n[4] Creating master extent...\n")
master_xmin <- min(sapply(extents, function(e) e$xmin))
master_xmax <- max(sapply(extents, function(e) e$xmax))
master_ymin <- min(sapply(extents, function(e) e$ymin))
master_ymax <- max(sapply(extents, function(e) e$ymax))
x_range_m <- (master_xmax - master_xmin) * 111320
y_range_m <- (master_ymax - master_ymin) * 111320
cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ",
"Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "")
cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "")
# Auto-determine grid size based on ROI dimensions
if (x_range_m < 10000 && y_range_m < 10000) {
cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n")
GRID_NROWS <- 1
GRID_NCOLS <- 1
} else {
cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n")
GRID_NROWS <- 5
GRID_NCOLS <- 5
}
N_TILES <- GRID_NROWS * GRID_NCOLS
# Check if master grid already exists
cat("\n[5] Checking if master grid exists...\n")
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
if (file.exists(master_grid_file)) {
cat(" ✓ Master grid exists! Loading existing grid...\n")
master_grid_sf <- st_read(master_grid_file, quiet = TRUE)
master_grid_vect <- terra::vect(master_grid_file)
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
} else {
cat(" Grid does not exist. Creating new master grid...\n")
# Create 5×5 grid
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
master_bbox <- st_bbox(c(
xmin = master_xmin,
xmax = master_xmax,
ymin = master_ymin,
ymax = master_ymax
), crs = 4326)
bbox_sf <- st_as_sfc(master_bbox)
master_grid <- st_make_grid(
bbox_sf,
n = c(GRID_NCOLS, GRID_NROWS),
what = "polygons"
)
master_grid_sf <- st_sf(
tile_id = sprintf("%02d", 1:length(master_grid)),
geometry = master_grid
)
cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "")
# Convert to SpatVector for use in makeTiles
master_grid_vect <- terra::vect(master_grid_sf)
# Save master grid
if (!dir.exists(OUTPUT_FOLDER)) {
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
}
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
}
# ============================================================================
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
# ============================================================================
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
# If grid was loaded from file, it's already filtered. Skip filtering.
if (!file.exists(MASTER_GRID_PATH)) {
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
# Check which tiles overlap with any field
overlapping_tile_indices <- c()
for (tile_idx in 1:nrow(master_grid_sf)) {
tile_geom <- master_grid_sf[tile_idx, ]
# Check overlap with any field
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
}
}
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
# Create filtered grid with only overlapping tiles
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
} else {
cat("\n[7] Using pre-filtered grid (already loaded from file)...\n")
# Grid was already loaded - it's already filtered
filtered_grid_sf <- master_grid_sf
}
# Convert to SpatVector for makeTiles
filtered_grid_vect <- terra::vect(filtered_grid_sf)
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
# ============================================================================
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
# ============================================================================
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
cat("\n[8] Processing each date independently...\n")
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
total_tiles_created <- 0
dates_skipped <- 0
dates_processed <- 0
for (file_idx in seq_along(tiff_files)) {
tiff_file <- tiff_files[file_idx]
date_str <- gsub("\\.tif$", "", tiff_file)
# Create date-specific output folder
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
# CHECK: Skip if date already processed (RESUME-SAFE)
if (dir.exists(date_output_folder)) {
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
if (length(existing_tiles) > 0) {
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
dates_skipped <- dates_skipped + 1
next # Skip this date
}
}
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
dates_processed <- dates_processed + 1
# Load TIFF for this date only
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
raster <- terra::rast(tiff_path)
dims <- dim(raster)
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
# Create date-specific output folder
if (!dir.exists(date_output_folder)) {
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
}
cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "")
# Use makeTiles with FILTERED grid (only overlapping tiles)
tiles_list <- terra::makeTiles(
x = raster,
y = filtered_grid_vect,
filename = file.path(date_output_folder, "tile.tif"),
overwrite = TRUE
)
# Rename tiles to [DATE]_[TILE_ID].tif
for (tile_idx in seq_along(tiles_list)) {
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
tile_id <- filtered_grid_sf$tile_id[tile_idx]
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
if (file.exists(source_file)) {
file.rename(source_file, final_file)
}
}
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
total_tiles_created <- total_tiles_created + length(tiles_list)
}
# ============================================================================
# VERIFICATION
# ============================================================================
cat("\n[9] Verifying output...\n")
# Count tiles per date folder
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
date_folders <- sort(date_folders[date_folders != "."])
total_tile_files <- 0
for (date_folder in date_folders) {
tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder),
pattern = "\\.tif$")
tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)]
total_tile_files <- total_tile_files + length(tiles_in_folder)
cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "")
}
# ============================================================================
# SUMMARY
# ============================================================================
cat("\n\n========== SUMMARY ==========\n")
cat("\nGrid Configuration:\n")
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
cat("\nField Filtering:\n")
cat(" - Field boundaries loaded from pivot.geojson\n")
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
cat(" - Significant storage savings for sparse fields!\n")
cat("\nProcessing Summary:\n")
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
if (dates_processed > 0) {
avg_tiles_per_date <- total_tiles_created / dates_processed
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
}
cat("\nDirectory Structure:\n")
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
cat(" ├── 2024-01-15/\n")
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
cat(" │ ├── 2024-01-15_05.tif\n")
cat(" │ └── ...\n")
cat(" ├── 2024-01-16/\n")
cat(" │ └── ...\n")
cat(" └── ...\n")
cat("\n⭐ Key Benefits:\n")
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
# ============================================================================
# WRITE TILING CONFIGURATION METADATA
# ============================================================================
# This metadata file is read by parameters_project.R to determine mosaic mode
# It allows script 40 to know what script 10 decided without re-computing
cat("\n[10] Writing tiling configuration metadata...\n")
config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json")
config_json <- paste0(
'{\n',
' "project": "', PROJECT, '",\n',
' "has_tiles": ', tolower(N_TILES > 1), ',\n',
' "grid_size": "', GRID_SIZE_LABEL, '",\n',
' "grid_rows": ', GRID_NROWS, ',\n',
' "grid_cols": ', GRID_NCOLS, ',\n',
' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n',
' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n',
' "created_date": "', Sys.Date(), '",\n',
' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n',
'}\n'
)
writeLines(config_json, config_file)
cat(" ✓ Metadata saved to: tiling_config.json\n")
cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "")
cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "")
cat("\n✓ Script complete!\n")

View file

@ -0,0 +1,165 @@
# ============================================================================
# SCRIPT 10: Create Per-Field TIFFs (Data Organization & Splitting)
# ============================================================================
# PURPOSE:
# Split full-farm satellite TIFFs into per-field file structure. Supports
# two phases: legacy data migration and ongoing new downloads. Transforms
# single large-file architecture into per-field directory structure for
# clean aggregation in downstream scripts (Script 20, 40, 80/90).
#
# INPUT DATA:
# - Source: laravel_app/storage/app/{project}/merged_tif/ or merged_final_tif/
# - Format: GeoTIFF (4-band RGB+NIR or 5-band with CI)
# - Naming: {YYYY-MM-DD}.tif (full farm mosaic)
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/field_tiles/
# - Format: GeoTIFF (4-band RGB+NIR, same as input)
# - Structure: field_tiles/{FIELD}/{YYYY-MM-DD}.tif
# - Naming: Per-field GeoTIFFs organized by field and date
#
# USAGE:
# Rscript 10_create_per_field_tiffs.R [project] [end_date] [offset]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba (default: angata)
# - end_date: End date for processing (YYYY-MM-DD format, default: today)
# - offset: Days to look back (numeric, default: 7)
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - primary data organization script
# - agronomic_support (AURA): Yes - supports field-level analysis
#
# DEPENDENCIES:
# - Packages: terra, sf, tidyverse
# - Utils files: parameters_project.R, 00_common_utils.R, 10_create_per_field_tiffs_utils.R
# - External data: Field boundaries (pivot.geojson)
# - Data directories: merged_tif/, field_tiles/ (created if missing)
#
# NOTES:
# - Supports two-phase migration: legacy (merged_final_tif) and ongoing (merged_tif)
# - Automatically detects and handles field boundaries from pivot.geojson
# - Geometry validation and repair applied via st_make_valid()
# - Critical for downstream Scripts 20, 40, and KPI calculations
# - Creates per-field structure that enables efficient per-field processing
#
# RELATED ISSUES:
# SC-111: Script 10 refactoring and geometry repair
# SC-112: Utilities restructuring (uses 00_common_utils.R)
#
# ============================================================================
# Spatial data handling
suppressPackageStartupMessages({
library(terra) # For raster operations (reading/writing GeoTIFFs, cropping to field boundaries)
library(sf) # For spatial operations (reading field boundaries GeoJSON, masking)
library(here) # For relative path resolution
})
# ==============================================================================
# MAIN PROCESSING FUNCTION
# ==============================================================================
main <- function() {
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
args <- commandArgs(trailingOnly = TRUE)
# Parse arguments: [project] [end_date] [offset]
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
end_date_arg <- if (length(args) >= 2 && args[2] != "") as.Date(args[2], format = "%Y-%m-%d") else Sys.Date()
offset_arg <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
# Make variables available to sourced files (they execute in global scope)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("end_date", end_date_arg, envir = .GlobalEnv)
assign("offset", offset_arg, envir = .GlobalEnv)
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
tryCatch({
source("r_app/parameters_project.R")
}, error = function(e) {
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
# Load Script 10-specific utilities
tryCatch({
source("r_app/10_create_per_field_tiffs_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 10_create_per_field_tiffs_utils.R: %s\n", e$message))
stop(e)
})
# STEP 4: Set default date parameters (can be overridden by pipeline runner via assign())
# These control which dates Script 10 processes from merged_tif/
# Window: end_date - offset days to end_date
# Always coerce to correct types to avoid issues with lingering/inherited values
if (!exists("end_date") || !inherits(end_date, "Date")) {
end_date <- Sys.Date()
safe_log(paste("Using default end_date:", end_date), "INFO")
}
if (!exists("offset") || !is.numeric(offset)) {
offset <- 7
safe_log(paste("Using default offset:", offset, "days"), "INFO")
}
# Ensure offset is numeric (in case it came in as a character string from environment)
if (is.character(offset)) {
offset <- as.numeric(offset)
}
# Calculate date window for processing
start_date <- end_date - offset
date_window <- seq(start_date, end_date, by = "day")
date_window_str <- format(date_window, "%Y-%m-%d")
safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO")
# STEP 5: Load centralized path structure (creates all directories automatically)
paths <- setup_project_directories(project_dir)
safe_log(paste("Project:", project_dir))
safe_log(paste("Base path:", paths$laravel_storage_dir))
safe_log(paste("Data dir:", paths$data_dir))
# Load field boundaries using data_dir (not field_boundaries_path)
# load_field_boundaries() expects a directory and builds the file path internally
fields_data <- load_field_boundaries(paths$data_dir)
fields <- fields_data$field_boundaries_sf
# Define input and output directories (from centralized paths)
merged_tif_dir <- paths$merged_tif_folder
field_tiles_dir <- paths$field_tiles_dir
field_tiles_ci_dir <- paths$field_tiles_ci_dir
# PHASE 1: Process new downloads (always runs)
# Pass field_tiles_ci_dir so it can skip dates already migrated
# Also pass end_date and offset so only dates in window are processed
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir,
end_date = end_date, offset = offset)
safe_log("\n========================================", "INFO")
safe_log("FINAL SUMMARY", "INFO")
safe_log("========================================", "INFO")
safe_log(paste("Processing: created =", process_result$total_created,
", skipped =", process_result$total_skipped,
", errors =", process_result$total_errors), "INFO")
safe_log("Script 10 complete", "INFO")
safe_log("========================================\n", "INFO")
quit(status = 0)
}
# Execute main if called from command line
if (sys.nframe() == 0) {
main()
}

View file

@ -0,0 +1,306 @@
# ==============================================================================
# 10_CREATE_PER_FIELD_TIFFS_UTILS.R
# ==============================================================================
# UTILITY FUNCTIONS FOR SCRIPT 10: Per-Field TIFF Creation
#
# PURPOSE:
# Extracted helper functions specific to Script 10 (per-field TIFF splitting).
# These functions are domain-specific (not generic) and handle raster cropping
# and batch processing of satellite imagery for per-field storage.
#
# FUNCTIONS:
# 1. crop_tiff_to_fields() — Crop a single TIFF to field boundaries
# 2. process_new_merged_tif() — Batch process all new merged TIFFs into per-field structure
#
# DEPENDENCIES:
# - terra (raster operations)
# - sf (geometry operations)
# - 00_common_utils.R (logging via safe_log())
#
# USAGE:
# source(here::here("r_app", "10_create_per_field_tiffs_utils.R"))
#
# ==============================================================================
library(terra)
library(sf)
# ==============================================================================
#' Crop Single TIFF to Field Boundaries
#'
#' Loads a single satellite TIFF, finds overlapping field boundaries, and writes
#' cropped per-field versions to output directory. Handles CRS reprojection and
#' idempotent file creation (skips existing files).
#'
#' @param tif_path Character. Absolute path to source TIFF file (4-band or 5-band).
#' @param tif_date Character. Date string (YYYY-MM-DD) for naming output files.
#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column.
#' @param output_base_dir Character. Base directory where per-field TIFFs are stored
#' (e.g., "field_tiles" or "field_tiles_CI").
#'
#' @return List with elements:
#' - created: Integer. Number of field TIFFs created
#' - skipped: Integer. Number of files that already existed
#' - errors: Integer. Number of fields that failed to crop
#'
#' @details
#' **Output Structure:**
#' Creates files at: {output_base_dir}/{field_name}/{tif_date}.tif
#'
#' **CRS Handling:**
#' Automatically reprojects field boundaries to match raster CRS before cropping.
#'
#' **Idempotency:**
#' If output file already exists, skips writing (counted as "skipped").
#' Safe to re-run without duplicate file creation.
#'
#' @examples
#' \dontrun{
#' fields <- load_field_boundaries("path/to/pivot.geojson")
#' result <- crop_tiff_to_fields(
#' tif_path = "merged_tif/2024-01-15.tif",
#' tif_date = "2024-01-15",
#' fields = fields,
#' output_base_dir = "field_tiles"
#' )
#' cat(sprintf("Created: %d, Skipped: %d, Errors: %d\n",
#' result$created, result$skipped, result$errors))
#' }
#'
crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {
created <- 0
skipped <- 0
errors <- 0
# Load raster
if (!file.exists(tif_path)) {
safe_log(paste("ERROR: TIFF not found:", tif_path), "ERROR")
return(list(created = 0, skipped = 0, errors = 1))
}
rast <- tryCatch({
rast(tif_path)
}, error = function(e) {
safe_log(paste("ERROR loading raster:", e$message), "ERROR")
return(NULL)
})
if (is.null(rast)) {
return(list(created = 0, skipped = 0, errors = 1))
}
# Create raster bounding box in raster CRS
rast_bbox <- st_as_sfc(st_bbox(rast))
st_crs(rast_bbox) <- st_crs(rast)
# Reproject fields to match raster CRS
fields_reprojected <- st_transform(fields, st_crs(rast_bbox))
# Find which fields intersect this raster (CRITICAL: raster bbox first, then fields)
overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE)
overlapping_indices <- unique(unlist(overlapping_indices))
if (length(overlapping_indices) == 0) {
safe_log(paste("No fields intersect TIFF:", basename(tif_path)), "INFO")
return(list(created = 0, skipped = 0, errors = 0))
}
# Process each overlapping field
for (field_idx in overlapping_indices) {
field_name <- fields$field[field_idx]
field_geom <- fields_reprojected[field_idx, ]
# Create field directory
field_dir <- file.path(output_base_dir, field_name)
if (!dir.exists(field_dir)) {
dir.create(field_dir, recursive = TRUE, showWarnings = FALSE)
}
# Output file path
output_path <- file.path(field_dir, paste0(tif_date, ".tif"))
# Check if file already exists (idempotent)
if (file.exists(output_path)) {
skipped <- skipped + 1
next
}
# Crop raster to field boundary
tryCatch({
field_rast <- crop(rast, field_geom)
writeRaster(field_rast, output_path, overwrite = TRUE)
created <- created + 1
}, error = function(e) {
safe_log(paste("ERROR cropping field", field_name, ":", e$message), "ERROR")
errors <<- errors + 1
})
}
return(list(created = created, skipped = skipped, errors = errors))
}
# ==============================================================================
#' Process New Merged TIFFs into Per-Field Structure
#'
#' Batch processes all new 4-band raw satellite TIFFs from merged_tif/ directory.
#' Crops each TIFF to field boundaries and stores results in per-field subdirectories.
#' Supports migration mode: skips dates already processed and migrated to field_tiles_CI/.
#'
#' @param merged_tif_dir Character. Source directory containing raw merged TIFFs
#' (e.g., "merged_tif/").
#' @param field_tiles_dir Character. Target directory for per-field TIFFs
#' (e.g., "field_tiles/").
#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column.
#' @param field_tiles_ci_dir Character. Optional. Directory where migrated CI-calculated
#' TIFFs are stored. If provided, skips dates
#' already processed and moved to field_tiles_CI/.
#' Default: NULL (process all dates).
#' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD).
#' Default: NULL (process all available dates).
#' @param offset Integer. Optional. Number of days to look back from end_date.
#' Only used if end_date is also provided.
#' Default: NULL (process all available dates).
#'
#' @return List with elements:
#' - total_created: Integer. Total field TIFFs created across all dates
#' - total_skipped: Integer. Total files that already existed
#' - total_errors: Integer. Total cropping errors across all dates
#'
#' @details
#' **Migration Logic:**
#' When migration phase is active (field_tiles_CI/ contains CI-calculated TIFFs),
#' this function detects which dates have been migrated and skips reprocessing them.
#' This prevents redundant work during the per-field architecture transition.
#'
#' **Date Detection:**
#' Finds all files in merged_tif_dir matching pattern: YYYY-MM-DD.tif
#'
#' **Output Structure:**
#' Creates per-field subdirectories and stores: field_tiles/{FIELD}/{DATE}.tif
#'
#' @examples
#' \dontrun{
#' fields <- load_field_boundaries("path/to/pivot.geojson")
#' result <- process_new_merged_tif(
#' merged_tif_dir = "merged_tif",
#' field_tiles_dir = "field_tiles",
#' fields = fields,
#' field_tiles_ci_dir = "field_tiles_CI"
#' )
#' cat(sprintf("Processing complete: created=%d, skipped=%d, errors=%d\n",
#' result$total_created, result$total_skipped, result$total_errors))
#' }
#'
process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL,
end_date = NULL, offset = NULL) {
safe_log("\n========================================", "INFO")
safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO")
safe_log("========================================", "INFO")
# Check if download directory exists
if (!dir.exists(merged_tif_dir)) {
safe_log("No merged_tif/ directory found - no new data to process", "WARNING")
return(list(total_created = 0, total_skipped = 0, total_errors = 0))
}
# Create output directory
if (!dir.exists(field_tiles_dir)) {
dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE)
}
# Find all date-pattern TIFFs in root of merged_tif
tiff_files <- list.files(
merged_tif_dir,
pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$",
full.names = TRUE
)
# FILTER by date window if end_date and offset provided
if (!is.null(end_date) && !is.null(offset)) {
start_date <- end_date - offset
date_range <- seq(start_date, end_date, by = "day")
date_range_str <- format(date_range, "%Y-%m-%d")
# Extract dates from filenames and filter
tiff_dates <- gsub("\\.tif$", "", basename(tiff_files))
tiff_files <- tiff_files[tiff_dates %in% date_range_str]
safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO")
}
safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO")
if (length(tiff_files) == 0) {
safe_log("No new TIFFs found - nothing to process", "WARNING")
return(list(total_created = 0, total_skipped = 0, total_errors = 0))
}
# Process each new TIFF
total_created <- 0
total_skipped <- 0
total_errors <- 0
for (tif_path in tiff_files) {
tif_date <- gsub("\\.tif$", "", basename(tif_path))
# CHECK 1: Skip if this date was already migrated to field_tiles_CI/
# (This means Script 20 already processed it and extracted RDS)
if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) {
# Check if ANY field has this date in field_tiles_CI/
date_migrated <- FALSE
# Sample check: look for date in field_tiles_CI/*/DATE.tif
sample_field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
for (field_dir in sample_field_dirs) {
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
if (file.exists(potential_file)) {
date_migrated <- TRUE
break
}
}
if (date_migrated) {
safe_log(paste("Skipping:", tif_date, "(already migrated and processed by Script 20)"), "INFO")
total_skipped <- total_skipped + 1
next
}
}
# CHECK 2: Skip if this date already exists in field_tiles/
# (means this date has already been processed through Script 10)
if (dir.exists(field_tiles_dir)) {
date_exists_in_field_tiles <- FALSE
# Check if ANY field directory has this date
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
for (field_dir in field_dirs) {
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
if (file.exists(potential_file)) {
date_exists_in_field_tiles <- TRUE
break
}
}
if (date_exists_in_field_tiles) {
safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO")
total_skipped <- total_skipped + 1
next
}
}
safe_log(paste("Processing:", tif_date), "INFO")
result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir)
total_created <- total_created + result$created
total_skipped <- total_skipped + result$skipped
total_errors <- total_errors + result$errors
}
safe_log(paste("Processing complete: created =", total_created,
", skipped =", total_skipped, ", errors =", total_errors), "INFO")
return(list(total_created = total_created, total_skipped = total_skipped,
total_errors = total_errors))
}

View file

@ -1,202 +0,0 @@
# CI_EXTRACTION.R
# ==============
# This script processes satellite imagery to extract Canopy Index (CI) values for agricultural fields.
# It handles image processing, masking, and extraction of statistics by field/sub-field.
# Supports both 4-band and 8-band PlanetScope data with automatic band detection and cloud masking.
#
# Usage: Rscript 02_ci_extraction.R [end_date] [offset] [project_dir] [data_source]
# - end_date: End date for processing (YYYY-MM-DD format)
# - offset: Number of days to look back from end_date
# - project_dir: Project directory name (e.g., "angata", "aura", "chemba")
# - data_source: Data source directory - "merged_tif_8b" (default) or "merged_tif" (4-band) or "merged_final_tif"
# If tiles exist (daily_tiles_split/), they are used automatically
#
# Examples:
# # Angata 8-band data (with UDM cloud masking)
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
#
# # Aura 4-band data
# Rscript 20_ci_extraction.R 2025-11-26 7 aura merged_tif
#
# # Auto-detects and uses tiles if available:
# Rscript 20_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists)
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(readxl)
library(here)
library(furrr)
library(future)
})
# 2. Process command line arguments
# ------------------------------
main <- function() {
# Capture command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Process end_date argument
if (length(args) >= 1 && !is.na(args[1])) {
end_date <- as.Date(args[1])
if (is.na(end_date)) {
warning("Invalid end_date provided. Using default (current date).")
end_date <- Sys.Date()
#end_date <- "2023-10-01"
}
} else {
end_date <- Sys.Date()
#end_date <- "2023-10-01"
}
# Process offset argument
if (length(args) >= 2 && !is.na(args[2])) {
offset <- as.numeric(args[2])
if (is.na(offset) || offset <= 0) {
warning("Invalid offset provided. Using default (7 days).")
offset <- 7
}
} else {
offset <- 7
}
# Process project_dir argument
if (length(args) >= 3 && !is.na(args[3])) {
project_dir <- as.character(args[3])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
project_dir <- "angata" # Changed default from "aura" to "esa"
}
# Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif)
if (length(args) >= 4 && !is.na(args[4])) {
data_source <- as.character(args[4])
# Validate data_source is a recognized option
if (!data_source %in% c("merged_tif_8b", "merged_tif", "merged_final_tif")) {
warning(paste("Data source", data_source, "not in standard list. Using as-is."))
}
} else if (exists("data_source", envir = .GlobalEnv)) {
data_source <- get("data_source", envir = .GlobalEnv)
} else {
data_source <- "merged_tif_8b" # Default to 8-band (newer data with cloud masking)
}
# Make project_dir and data_source available globally
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
cat(sprintf("CI Extraction: project=%s, end_date=%s, offset=%d days, data_source=%s\n",
project_dir, format(end_date, "%Y-%m-%d"), offset, data_source))
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
ci_extraction_script <- TRUE
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
# 3. Initialize project configuration
# --------------------------------
new_project_question <- TRUE
cat("[DEBUG] Attempting to source r_app/parameters_project.R\n")
tryCatch({
source("r_app/parameters_project.R")
cat("[DEBUG] Successfully sourced r_app/parameters_project.R\n")
}, error = function(e) {
cat("[ERROR] Failed to source r_app/parameters_project.R:\n", e$message, "\n")
stop(e)
})
cat("[DEBUG] Attempting to source r_app/20_ci_extraction_utils.R\n")
tryCatch({
source("r_app/20_ci_extraction_utils.R")
cat("[DEBUG] Successfully sourced r_app/20_ci_extraction_utils.R\n")
}, error = function(e) {
cat("[ERROR] Failed to source r_app/20_ci_extraction_utils.R:\n", e$message, "\n")
stop(e)
})
# 4. Generate date list for processing
# ---------------------------------
dates <- date_list(end_date, 7)
log_message(paste("Processing data for week", dates$week, "of", dates$year))
# 5. Find and filter raster files by date - with grid size detection
# -----------------------------------
log_message("Searching for raster files")
# Check if tiles exist (Script 01 output) - detect grid size dynamically
tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split")
# Detect grid size from daily_tiles_split folder structure
# Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc.
grid_size <- NA
if (dir.exists(tiles_split_base)) {
subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
# Look for grid size patterns like "5x5", "10x10", "20x20"
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
grid_size <- grid_patterns[1] # Use first grid size found
log_message(paste("Detected grid size:", grid_size))
}
}
# Construct tile folder path with grid size
if (!is.na(grid_size)) {
tile_folder <- file.path(tiles_split_base, grid_size)
} else {
tile_folder <- tiles_split_base
}
use_tiles <- dir.exists(tile_folder)
# Make grid_size available globally for other functions
assign("grid_size", grid_size, envir = .GlobalEnv)
tryCatch({
if (use_tiles) {
# Use tile-based processing
log_message(paste("Tile folder detected at", tile_folder))
log_message("Using tile-based CI extraction")
# Call the tile-based extraction function
process_ci_values_from_tiles(
dates = dates,
tile_folder = tile_folder,
field_boundaries = field_boundaries,
field_boundaries_sf = field_boundaries_sf,
daily_CI_vals_dir = daily_CI_vals_dir,
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
merged_final_dir = merged_final,
grid_size = grid_size
)
} else {
# Use legacy full-extent processing
log_message("No tiles found. Using legacy full-extent approach")
# Use the existing utility function to find satellite images
existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter)
log_message(paste("Found", length(existing_files), "raster files for processing"))
# Process raster files and create VRT
vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt)
# Process and combine CI values
process_ci_values(dates, field_boundaries, merged_final,
field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir)
}
}, error = function(e) {
log_message(paste("Error in main processing:", e$message), level = "ERROR")
stop(e$message)
})
}
if (sys.nframe() == 0) {
main()
}

View file

@ -0,0 +1,238 @@
# CI_EXTRACTION_PER_FIELD.R
# =========================
# Script 20 (Refactored for Per-Field Architecture)
#
# This script reads per-field TIFFs from Script 10 output and:
# 1. Calculates Canopy Index (CI) from 4-band imagery (RGB + NIR)
# 2. Outputs 5-band TIFFs with CI as the 5th band to field_tiles_CI/{FIELD}/{DATE}.tif
# 3. Outputs per-field per-date RDS files to daily_vals/{FIELD}/{DATE}.rds
#
# Key differences from legacy Script 20:
# - Input: field_tiles/{FIELD}/{DATE}.tif (4-band, from Script 10)
# - Output: field_tiles_CI/{FIELD}/{DATE}.tif (5-band with CI)
# - Output: daily_vals/{FIELD}/{DATE}.rds (per-field CI statistics)
# - Directly extracts CI statistics per sub_field within each field
#
# Usage:
# Rscript 20_ci_extraction_per_field.R [project_dir] [end_date] [offset]
# Example: Rscript 20_ci_extraction_per_field.R angata 2026-01-02 7
suppressPackageStartupMessages({
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(here)
})
# =============================================================================
# Main Processing
# =============================================================================
main <- function() {
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
# Parse command-line arguments FIRST
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date()
offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
# Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
# Load parameters_project.R (provides safe_log, date_list, setup_project_directories, etc.)
tryCatch({
source("r_app/parameters_project.R")
}, error = function(e) {
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
# Load CI extraction utilities
tryCatch({
source("r_app/20_ci_extraction_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 20_ci_extraction_utils.R: %s\n", e$message))
stop(e)
})
# STEP 3: Now all utilities are loaded, proceed with script logic
safe_log(sprintf("=== Script 20: CI Extraction Per-Field ==="))
safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days",
project_dir, format(end_date, "%Y-%m-%d"), offset))
# Set up directory paths from parameters
setup <- setup_project_directories(project_dir)
# Load field boundaries directly from field_boundaries_path in setup
tryCatch({
field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE)
safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path))
}, error = function(e) {
safe_log(sprintf("Error loading field boundaries from %s: %s", setup$field_boundaries_path, e$message), "ERROR")
stop(e)
})
# Get list of dates to process
# If in migration mode, dates_to_process is provided by the pipeline runner
if (exists("dates_to_process") && !is.null(dates_to_process)) {
# Migration mode: Use provided list of dates (process ALL available dates)
dates_filter <- sort(dates_to_process)
safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter)))
} else {
# Normal mode: Use 7-day offset window
dates <- date_list(end_date, offset)
dates_filter <- dates$days_filter
safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)",
dates$start_date, dates$end_date, length(dates_filter)))
}
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
safe_log(sprintf("Output RDS directory: %s", setup$daily_ci_vals_dir))
# Process each field
if (!dir.exists(setup$field_tiles_dir)) {
safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR")
stop("Script 10 output not found. Run Script 10 first.")
}
fields <- list.dirs(setup$field_tiles_dir, full.names = FALSE, recursive = FALSE)
fields <- fields[fields != ""] # Remove empty strings
if (length(fields) == 0) {
safe_log("No fields found in field_tiles directory", "WARNING")
return()
}
safe_log(sprintf("Found %d fields to process", length(fields)))
# DEBUG: Check what paths are available in setup
safe_log(sprintf("[DEBUG] Available setup paths: %s", paste(names(setup), collapse=", ")))
safe_log(sprintf("[DEBUG] field_tiles_ci_dir: %s", setup$field_tiles_ci_dir))
safe_log(sprintf("[DEBUG] daily_ci_vals_dir: %s", setup$daily_ci_vals_dir))
# Use daily_ci_vals_dir for per-field daily CI output
# Pre-create output subdirectories for all fields
for (field in fields) {
dir.create(file.path(setup$field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE)
if (!is.null(setup$daily_ci_vals_dir)) {
dir.create(file.path(setup$daily_ci_vals_dir, field), showWarnings = FALSE, recursive = TRUE)
}
}
# Process each DATE (load merged TIFF once, extract all fields from it)
total_success <- 0
total_error <- 0
for (date_str in dates_filter) {
# Load the MERGED TIFF (farm-wide) ONCE for this date
input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
if (!file.exists(input_tif_merged)) {
safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
total_error <<- total_error + 1
next
}
tryCatch({
# Load 4-band TIFF ONCE
raster_4band <- terra::rast(input_tif_merged)
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
# Calculate CI from 4-band
ci_raster <- calc_ci_from_raster(raster_4band)
# Create 5-band (R, G, B, NIR, CI)
five_band <- c(raster_4band, ci_raster)
# Now process all fields from this single merged TIFF
fields_processed_this_date <- 0
for (field in fields) {
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
# Pre-create output directories
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
# MODE 3: Skip if both outputs already exist
if (file.exists(output_tif) && file.exists(output_rds)) {
next
}
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists
if (file.exists(output_tif) && !file.exists(output_rds)) {
tryCatch({
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
fields_processed_this_date <- fields_processed_this_date + 1
}, error = function(e) {
# Continue to next field
})
next
}
# MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
tryCatch({
# Crop 5-band TIFF to field boundary
field_geom <- field_boundaries_sf %>% filter(field == !!field)
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
# Save 5-band field TIFF
terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
# Extract CI statistics by sub_field (from cropped CI raster)
ci_cropped <- five_band_cropped[[5]] # 5th band is CI
ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field)
# Save RDS
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
saveRDS(ci_stats, output_rds)
}
fields_processed_this_date <- fields_processed_this_date + 1
}, error = function(e) {
# Error in individual field, continue to next
safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING")
})
}
# Increment success counter if at least one field succeeded
if (fields_processed_this_date > 0) {
total_success <<- total_success + 1
safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date))
}
}, error = function(e) {
total_error <<- total_error + 1
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
})
}
# Summary
safe_log(sprintf("\n=== Processing Complete ==="))
safe_log(sprintf("Successfully processed: %d", total_success))
safe_log(sprintf("Errors encountered: %d", total_error))
if (total_success > 0) {
safe_log("Output files created in:")
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir))
}
}
# Execute main if called from command line
if (sys.nframe() == 0) {
main()
}

View file

@ -6,24 +6,10 @@
#
# Parallel Processing: Tile-based extraction uses furrr::future_map to process
# multiple tiles simultaneously (typically 2-4 tiles in parallel depending on CPU cores)
#' Safe logging function that works whether log_message exists or not
#'
#' @param message The message to log
#' @param level The log level (default: "INFO")
#' @return NULL (used for side effects)
#'
safe_log <- function(message, level = "INFO") {
if (exists("log_message")) {
log_message(message, level)
} else {
if (level %in% c("ERROR", "WARNING")) {
warning(message)
} else {
message(message)
}
}
}
#
# Per-Field Functions (Script 20):
# - calc_ci_from_raster(): Calculate CI from 4-band raster (Chlorophyll Index formula: NIR/Green - 1)
# - extract_ci_by_subfield(): Extract per-sub_field CI statistics from raster
#' Generate a sequence of dates for processing
#'
@ -216,9 +202,10 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
names(blue_band) <- "Blue"
names(nir_band) <- "NIR"
# Calculate Canopy Index from Red, Green, NIR
# CI = (NIR - Red) / (NIR + Red) is a common formulation
# But using NIR/Green - 1 is also valid and more sensitive to green vegetation
# Calculate Canopy Index from Green and NIR
# *** CRITICAL: Use CHLOROPHYLL INDEX formula ONLY ***
# CORRECT: CI = NIR / Green - 1 (ranges ~1-7, sensitive to active chlorophyll)
# WRONG: Do NOT use (NIR-Red)/(NIR+Red) - that is NDVI, ranges -1 to 1, different scale
CI <- nir_band / green_band - 1
names(CI) <- "CI"
@ -294,7 +281,6 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
# Write output files
terra::writeRaster(output_raster, new_file, overwrite = TRUE)
terra::vrt(new_file, vrt_file, overwrite = TRUE)
# Check if the result has enough valid pixels
valid_pixels <- terra::global(output_raster$CI, "notNA", na.rm=TRUE)
@ -738,7 +724,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
date = date,
field_boundaries_sf = field_boundaries_sf,
daily_CI_vals_dir = daily_CI_vals_dir,
merged_final_tif_dir = merged_final_dir,
grid_size = grid_size
)
@ -809,7 +794,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
date = date,
field_boundaries_sf = field_boundaries_sf,
daily_CI_vals_dir = daily_CI_vals_dir,
merged_final_tif_dir = merged_final_dir,
grid_size = grid_size
)
@ -850,7 +834,7 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure
#' @return Data frame with field CI statistics for this tile, or NULL if processing failed
#'
process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = NA) {
process_single_tile <- function(tile_file, field_boundaries_sf, date, grid_size = NA) {
tryCatch({
tile_filename <- basename(tile_file)
safe_log(paste(" [TILE] Loading:", tile_filename))
@ -892,25 +876,10 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin
output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band)
names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI")
# Save processed tile to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ with same filename
# This mirrors the input structure: daily_tiles_split/[GRID_SIZE]/[DATE]/
if (!is.na(grid_size)) {
date_dir <- file.path(merged_final_tif_dir, grid_size, date)
} else {
date_dir <- file.path(merged_final_tif_dir, date)
}
if (!dir.exists(date_dir)) {
dir.create(date_dir, recursive = TRUE, showWarnings = FALSE)
}
# Use same filename as source tile (e.g., 2026-01-02_01.tif)
tile_filename <- basename(tile_file)
output_file <- file.path(date_dir, tile_filename)
# Write processed tile
terra::writeRaster(output_raster, output_file, overwrite = TRUE)
safe_log(paste(" [SAVED TIFF] Output:", file.path(date, basename(output_file)), "(5 bands: Red, Green, Blue, NIR, CI)"))
# NOTE: Do NOT save processed tile - it's an intermediate only
# The purpose is to calculate field-level CI statistics, not to create permanent tile files
# This prevents bloat in merged_final_tif/ directory (would unnecessarily duplicate
# daily_tiles_split data with an extra CI band added)
# Extract statistics per field from CI band
field_bbox <- sf::st_bbox(field_boundaries_sf)
@ -951,7 +920,7 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin
#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure
#' @return Data frame with field CI statistics for the date
#'
extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL, grid_size = NA) {
extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, grid_size = NA) {
if (!inherits(field_boundaries_sf, "sf")) {
field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf)
@ -970,7 +939,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
stats_list <- tryCatch({
furrr::future_map(
tile_files,
~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size),
~ process_single_tile(.x, field_boundaries_sf, date, grid_size = grid_size),
.progress = FALSE,
.options = furrr::furrr_options(seed = TRUE)
)
@ -980,7 +949,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
lapply(
tile_files,
function(tile_file) {
process_single_tile(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size)
process_single_tile(tile_file, field_boundaries_sf, date, grid_size = grid_size)
}
)
})
@ -1013,3 +982,208 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
return(aggregated)
}
# =============================================================================
# Script 20 (Per-Field) Specific Functions
# =============================================================================
#' Calculate Canopy Index (CI) from 4-band raster
#'
#' *** CRITICAL FORMULA: CI = NIR / Green - 1 ***
#' This is the CHLOROPHYLL INDEX formula (ranges ~1-7 for vegetation).
#' NOT NDVI! Do NOT use (NIR-Red)/(NIR+Red) - that produces -1 to 1 range.
#'
#' Expects band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4)
#'
#' @param raster_obj Loaded raster object with at least 4 bands
#' @return Raster object containing CI values (Chlorophyll Index, ranges ~1-7)
#'
calc_ci_from_raster <- function(raster_obj) {
# Expected band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4)
if (terra::nlyr(raster_obj) < 4) {
stop("Raster has fewer than 4 bands. Cannot calculate CI.")
}
green <- terra::subset(raster_obj, 2) # Green band (required for proper CI calculation)
nir <- terra::subset(raster_obj, 4) # NIR
# *** CHLOROPHYLL INDEX = NIR / Green - 1 ***
# This formula is sensitive to active chlorophyll content and ranges ~1-7
# DO NOT use (NIR-Red)/(NIR+Red) - that is NDVI (Normalized Difference Vegetation Index)
# NDVI ranges -1 to 1 and is different from Chlorophyll Index
ci <- nir / green - 1
return(ci)
}
#' Extract CI statistics by sub_field from a CI raster
#'
#' For a given field, masks the CI raster to each sub_field polygon
#' and calculates statistics (mean, median, sd, min, max, count).
#'
#' @param ci_raster Raster object containing CI values
#' @param field_boundaries_sf SF object containing field/sub_field polygons
#' @param field_name Character string of the field name to process
#' @return Data frame with field, sub_field, and CI statistics; NULL if field not found
#'
extract_ci_by_subfield <- function(ci_raster, field_boundaries_sf, field_name) {
# NOTE: Per-field TIFFs are already cropped to field boundaries by Script 10
# No need to mask again - just extract all valid pixels from the raster
# Extract ALL CI values (no masking needed for pre-cropped per-field TIFFs)
ci_values <- terra::values(ci_raster, na.rm = TRUE)
if (length(ci_values) > 0) {
result_row <- data.frame(
field = field_name,
sub_field = field_name, # Use field_name as sub_field since TIFF is already field-specific
ci_mean = mean(ci_values, na.rm = TRUE),
ci_median = median(ci_values, na.rm = TRUE),
ci_sd = sd(ci_values, na.rm = TRUE),
ci_min = min(ci_values, na.rm = TRUE),
ci_max = max(ci_values, na.rm = TRUE),
ci_count = length(ci_values),
stringsAsFactors = FALSE
)
} else {
result_row <- data.frame(
field = field_name,
sub_field = field_name,
ci_mean = NA_real_,
ci_median = NA_real_,
ci_sd = NA_real_,
ci_min = NA_real_,
ci_max = NA_real_,
ci_count = 0,
stringsAsFactors = FALSE
)
}
return(result_row)
}
#' Extract RDS from existing CI TIFF (Migration/Regeneration Mode)
#'
#' This function extracts CI statistics from an already-calculated CI TIFF
#' without needing to recalculate from raw 4-band imagery.
#' Used during migration when field_tiles_CI/ exists but daily_vals/{FIELD}/ is missing.
#'
#' @param ci_tiff_path Path to the 5-band TIFF containing CI as the 5th band
#' @param output_rds_path Path where to save the output RDS file
#' @param field_boundaries_sf SF object with field/sub_field polygons
#' @param field_name Name of the field to extract
#' @return The RDS data frame (invisibly) and saves to disk
#'
extract_rds_from_ci_tiff <- function(ci_tiff_path, output_rds_path, field_boundaries_sf, field_name) {
tryCatch({
# Load the 5-band TIFF
raster_5band <- terra::rast(ci_tiff_path)
# Extract CI (5th band)
# Assuming structure: [1]=R, [2]=G, [3]=B, [4]=NIR, [5]=CI
ci_raster <- raster_5band[[5]]
# Extract CI statistics by sub_field
ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field_name)
# Save RDS
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
saveRDS(ci_stats, output_rds_path)
return(invisible(ci_stats))
} else {
safe_log(sprintf("No CI statistics extracted from %s", ci_tiff_path), "WARNING")
return(invisible(NULL))
}
}, error = function(e) {
safe_log(sprintf("Error extracting RDS from CI TIFF: %s", e$message), "ERROR")
return(invisible(NULL))
})
}
#' Regenerate ALL missing RDS files from existing CI TIFFs (Comprehensive Migration Mode)
#'
#' This function processes ALL dates in field_tiles_CI/ and extracts RDS for any missing daily_vals/
#' files. No date window filtering - processes the entire historical archive.
#'
#' Used for one-time migration of old projects where field_tiles_CI/ is populated but daily_vals/
#' RDS files are missing or incomplete.
#'
#' @param field_tiles_ci_dir Path to field_tiles_CI/ (input: pre-calculated CI TIFFs)
#' @param daily_vals_dir Path to daily_vals/ (output: RDS statistics files)
#' @param field_boundaries_sf SF object with field/sub_field polygons
#' @param fields Vector of field names to process
#'
#' @return List with summary stats: list(total_processed=N, total_skipped=M, total_errors=K)
#'
regenerate_all_missing_rds <- function(field_tiles_ci_dir, daily_vals_dir, field_boundaries_sf, fields) {
safe_log("\n========================================")
safe_log("MIGRATION MODE: REGENERATING ALL MISSING RDS")
safe_log("Processing ALL dates in field_tiles_CI/")
safe_log("========================================")
total_processed <- 0
total_skipped <- 0
total_errors <- 0
# Iterate through each field
for (field in fields) {
field_ci_path <- file.path(field_tiles_ci_dir, field)
field_daily_vals_path <- file.path(daily_vals_dir, field)
# Skip if field directory doesn't exist
if (!dir.exists(field_ci_path)) {
safe_log(sprintf(" Field %s: field_tiles_CI not found (skipping)", field))
continue
}
# Create output directory for RDS
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
# Find ALL CI TIFFs for this field (no date filtering)
ci_tiff_files <- list.files(
path = field_ci_path,
pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$",
full.names = TRUE
)
if (length(ci_tiff_files) == 0) {
safe_log(sprintf(" Field %s: No CI TIFFs found (skipping)", field))
next
}
safe_log(sprintf(" Field %s: Found %d CI TIFFs to process", field, length(ci_tiff_files)))
# Process each CI TIFF
for (ci_tiff in ci_tiff_files) {
date_str <- tools::file_path_sans_ext(basename(ci_tiff))
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
# Skip if RDS already exists
if (file.exists(output_rds)) {
total_skipped <- total_skipped + 1
next
}
# Extract RDS from CI TIFF
tryCatch({
extract_rds_from_ci_tiff(ci_tiff, output_rds, field_boundaries_sf, field)
safe_log(sprintf(" %s: ✓ RDS extracted", date_str))
total_processed <- total_processed + 1
}, error = function(e) {
safe_log(sprintf(" %s: ✗ Error - %s", date_str, e$message), "ERROR")
total_errors <<- total_errors + 1
})
}
}
safe_log(sprintf("\nMigration complete: processed %d, skipped %d, errors %d",
total_processed, total_skipped, total_errors))
return(list(
total_processed = total_processed,
total_skipped = total_skipped,
total_errors = total_errors
))
}

View file

@ -1,22 +1,62 @@
# 02b_CONVERT_CI_RDS_TO_CSV.R
# ============================
# Convert combined_CI_data.rds (output of script 02) to CSV format for Python
# This script runs AFTER script 02 (CI extraction) and creates a CSV that Python
# can use for harvest date detection WITHOUT requiring the 'model' column (which
# comes from script 03 after interpolation and harvest dates are known).
# ============================================================================
# SCRIPT 21: Convert CI RDS to CSV (Python Compatibility Format)
# ============================================================================
# PURPOSE:
# Convert consolidated CI data from R's wide format (RDS) to Python-compatible
# long format (CSV). Prepares per-field CI time series for harvest detection
# models and Python ML workflows without requiring interpolated/modeled values.
#
# Usage: Rscript 02b_convert_ci_rds_to_csv.R [project_dir]
# - project_dir: Project directory name (e.g., "esa", "chemba", "angata")
# INPUT DATA:
# - Source: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
# - Format: RDS (interpolated growth model data from Script 30)
# - Requirement: Script 30 must have completed growth model interpolation
#
# Output: CSV file at laravel_app/storage/app/{project_dir}/Data/extracted_ci/cumulative_vals/ci_data_for_python.csv
# Columns: field, sub_field, Date, FitData, DOY, value (alias for FitData)
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
# - Format: CSV (long format)
# - Columns: field, sub_field, Date, FitData, DOY, value
#
# USAGE:
# Rscript 21_convert_ci_rds_to_csv.R [project]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - data export
# - agronomic_support (AURA): Yes - Python ML integration
#
# DEPENDENCIES:
# - Packages: tidyverse, lubridate, zoo
# - Utils files: parameters_project.R
# - Input data: combined_CI_data.rds from Script 20
# - Data directories: extracted_ci/cumulative_vals/
#
# NOTES:
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
# - Handles both wide format and long format inputs from growth model
# - DOY (Day of Year): Calculated from date for seasonal analysis
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
# - Used by: Python harvest detection models (harvest_date_prediction.py)
# - Exports complete growth curves with interpolated values for ML training
#
# RELATED ISSUES:
# SC-112: Utilities restructuring
# SC-108: Core pipeline improvements
#
# ============================================================================
suppressPackageStartupMessages({
library(tidyverse)
library(lubridate)
library(zoo)
library(here)
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Data manipulation
library(tidyverse) # For dplyr, readr (data wrangling and CSV I/O)
library(lubridate) # For date/time operations (DOY calculation)
library(zoo) # For zoo objects (gap filling, rolling operations)
})
# ============================================================================
@ -140,7 +180,7 @@ main <- function() {
cat(sprintf("Converting CI RDS to CSV: project=%s\n", project_dir))
# Initialize project configuration
# Initialize project configuration and centralized paths
tryCatch({
source("parameters_project.R")
}, error = function(e) {
@ -152,49 +192,63 @@ main <- function() {
})
})
# Define paths
ci_data_source_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals")
ci_data_output_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "ci_data_for_python")
# Load centralized path structure (creates all directories automatically)
paths <- setup_project_directories(project_dir)
# Create output directory if it doesn't exist (for new projects)
if (!dir.exists(ci_data_output_dir)) {
dir.create(ci_data_output_dir, recursive = TRUE, showWarnings = FALSE)
cat(sprintf("✓ Created output directory: %s\n", ci_data_output_dir))
}
# Use centralized paths (no need for dir.create - already handled)
ci_data_source_dir <- paths$cumulative_ci_vals_dir
ci_data_output_dir <- paths$ci_for_python_dir
input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds")
# Try to load interpolated growth model data from Script 30
input_file <- file.path(ci_data_source_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
# Check if input file exists
if (!file.exists(input_file)) {
stop(paste("Input file not found:", input_file))
stop(paste("Input file not found:", input_file,
"\nScript 30 (growth model) must be run before Script 21."))
}
cat(sprintf("Loading: %s\n", input_file))
# Load RDS file
ci_data_wide <- readRDS(input_file) %>%
# Load RDS file (from Script 30 - already in long format with interpolated values)
ci_data <- readRDS(input_file) %>%
as_tibble()
cat(sprintf(" Loaded %d rows\n", nrow(ci_data_wide)))
cat(sprintf(" Format: WIDE (field, sub_field, then dates as columns)\n"))
cat(sprintf(" Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", ")))
cat(sprintf(" Loaded %d rows\n", nrow(ci_data)))
cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", ")))
# Step 1: Convert from WIDE to LONG format
cat("\nStep 1: Converting from wide to long format...\n")
ci_data_long <- wide_to_long_ci_data(ci_data_wide)
# Check format and prepare for export
# If it's already in long format (from Script 30), use as-is
# Otherwise, convert from wide to long
if ("Date" %in% names(ci_data) || "date" %in% names(ci_data)) {
cat(" Detected: LONG format (from growth model)\n")
ci_data_long <- ci_data
} else {
cat(" Detected: WIDE format - converting to long...\n")
ci_data_long <- wide_to_long_ci_data(ci_data)
}
# Step 2: Create complete daily sequences with interpolation
# Step 1: Ensure Date column exists and is properly formatted
ci_data_long <- ci_data_long %>%
mutate(
Date = as.Date(Date)
)
# Step 2: If interpolated values already present, use them; otherwise create interpolated sequences
if ("value" %in% names(ci_data_long)) {
# Already has interpolated values from Script 30
cat("\nStep 2: Using interpolated values from growth model...\n")
ci_data_python <- ci_data_long
} else {
# Create interpolated daily sequences
cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
# Step 3: Validate output
cat("\nStep 3: Validating output...")
validate_conversion_output(ci_data_python)
}
# Step 4: Save to CSV
cat(sprintf("\nStep 4: Saving to CSV...\n"))
cat(sprintf(" Output: %s\n", output_file))
cat(sprintf("\nStep 4: Saving to CSV...\\n"))
cat(sprintf(" Output: %s\\n", output_file))
write_csv(ci_data_python, output_file)
cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))

View file

@ -4,57 +4,143 @@
# ===================
# Utility functions for growth model interpolation and manipulation.
# These functions support the creation of continuous growth models from point measurements.
#
# PERFORMANCE OPTIMIZATION:
# - Parallel file I/O: Reads 450k+ RDS files using furrr::future_map_dfr()
# - Parallel field interpolation: Processes fields in parallel (1 core per ~100 fields)
# - Dynamic CPU detection: Allocates workers based on available cores
# - Windows compatible: Uses furrr with plan(multisession) for cross-platform support
#' Safe logging function that works whether log_message exists or not
#' Load and prepare the combined CI data (Per-Field Architecture)
#' OPTIMIZE: Filters by date during load (skip unnecessary date ranges)
#' PARALLELIZE: Reads 450k+ RDS files in parallel using furrr::future_map_dfr()
#'
#' @param message The message to log
#' @param level The log level (default: "INFO")
#' @return NULL (used for side effects)
#' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals)
#' @param harvesting_data Optional: Dataframe with season dates. If provided, only loads files within season ranges (major speedup)
#' @return Long-format dataframe with CI values by date and field
#'
safe_log <- function(message, level = "INFO") {
if (exists("log_message")) {
log_message(message, level)
load_combined_ci_data <- function(daily_vals_dir, harvesting_data = NULL) {
# For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals
# Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
if (!dir.exists(daily_vals_dir)) {
stop(paste("Daily values directory not found:", daily_vals_dir))
}
safe_log(paste("Loading per-field CI data from:", daily_vals_dir))
# OPTIMIZATION: If harvest data provided, extract date range to avoid loading unnecessary dates
date_filter_min <- NULL
date_filter_max <- NULL
if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
date_filter_min <- min(harvesting_data$season_start, na.rm = TRUE)
date_filter_max <- max(harvesting_data$season_end, na.rm = TRUE)
safe_log(sprintf("Pre-filtering by harvest season dates: %s to %s",
format(date_filter_min, "%Y-%m-%d"),
format(date_filter_max, "%Y-%m-%d")))
}
# Find all daily RDS files recursively (per-field structure)
# IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories
all_daily_files <- list.files(
path = daily_vals_dir,
pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$", # Only YYYY-MM-DD.rds format
full.names = TRUE,
recursive = TRUE
)
# Further filter: only keep files that are in a subdirectory (per-field structure)
# Exclude legacy files at the root level like "extracted_2024-02-29_whole_field.rds"
all_daily_files <- all_daily_files[basename(dirname(all_daily_files)) != "daily_vals"]
if (length(all_daily_files) == 0) {
stop(paste("No per-field daily RDS files found in:", daily_vals_dir))
}
safe_log(sprintf("Found %d per-field daily RDS files (filtered from legacy format)", length(all_daily_files)))
# OPTIMIZATION: Filter files by filename date BEFORE parallel loading
# Skip files outside harvest season (can save 60-80% of I/O on large datasets)
if (!is.null(date_filter_min) && !is.null(date_filter_max)) {
all_daily_files <- all_daily_files[
{
dates <- as.Date(tools::file_path_sans_ext(basename(all_daily_files)), format = "%Y-%m-%d")
!is.na(dates) & dates >= date_filter_min & dates <= date_filter_max
}
]
safe_log(sprintf("Filtered to %d files within harvest season date range", length(all_daily_files)))
}
# Set up parallel future plan (Windows PSOCK multisession; Mac/Linux can use forking)
# Automatically detect available cores and limit to reasonable number
n_cores <- min(parallel::detectCores() - 1, 8) # Use max 8 cores (diminishing returns after)
future::plan(strategy = future::multisession, workers = n_cores)
safe_log(sprintf("Using %d parallel workers for file I/O", n_cores))
# Parallel file reading: future_map_dfr processes each file in parallel
# Returns combined dataframe directly (no need to rbind)
combined_long <- furrr::future_map_dfr(
all_daily_files,
.progress = TRUE,
.options = furrr::furrr_options(seed = TRUE),
function(file) {
# Extract date from filename: {YYYY-MM-DD}.rds
filename <- basename(file)
date_str <- tools::file_path_sans_ext(filename)
# Parse date
if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) {
parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
} else {
if (level %in% c("ERROR", "WARNING")) {
warning(message)
} else {
message(message)
}
}
return(data.frame()) # Return empty dataframe if parse fails
}
#' Load and prepare the combined CI data
#'
#' @param data_dir Directory containing the combined CI data
#' @return Long-format dataframe with CI values by date
#'
load_combined_ci_data <- function(data_dir) {
file_path <- here::here(data_dir, "combined_CI_data.rds")
if (!file.exists(file_path)) {
stop(paste("Combined CI data file not found:", file_path))
if (is.na(parsed_date)) {
return(data.frame())
}
safe_log(paste("Loading CI data from:", file_path))
# Read RDS file
tryCatch({
rds_data <- readRDS(file)
# Load and transform the data to long format
pivot_stats <- readRDS(file_path) %>%
dplyr::ungroup() %>%
dplyr::group_by(field, sub_field) %>%
dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
if (is.null(rds_data) || nrow(rds_data) == 0) {
return(data.frame())
}
pivot_stats_long <- pivot_stats %>%
tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
dplyr::mutate(
Date = lubridate::ymd(Date),
value = as.numeric(value)
) %>%
tidyr::drop_na(c("value", "Date")) %>%
dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names
dplyr::filter(!is.infinite(value)) %>%
dplyr::distinct()
# Add date column to the data
rds_data %>%
dplyr::mutate(Date = parsed_date)
safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
}, error = function(e) {
return(data.frame()) # Return empty dataframe on error
})
}
)
# Return to sequential processing to avoid nested parallelism
future::plan(future::sequential)
if (nrow(combined_long) == 0) {
safe_log("Warning: No valid CI data loaded from daily files", "WARNING")
return(data.frame())
}
# OPTIMIZATION: Use data.table for fast filtering (10-20x faster than dplyr on large datasets)
# Reshape to long format using ci_mean as the main CI value
DT <- data.table::as.data.table(combined_long)
DT <- DT[, .(field, sub_field, ci_mean, Date)]
DT[, c("value") := list(as.numeric(ci_mean))]
DT[, ci_mean := NULL]
# Fast filtering without .distinct() (which is slow on large datasets)
# Keep rows where Date is valid, field/sub_field exist, and value is finite
DT <- DT[!is.na(Date) & !is.na(sub_field) & !is.na(field) & is.finite(value)]
# Convert back to tibble for compatibility with rest of pipeline
pivot_stats_long <- dplyr::as_tibble(DT)
safe_log(sprintf("Loaded %d CI data points from %d daily files",
nrow(pivot_stats_long), length(all_daily_files)))
return(pivot_stats_long)
}
@ -65,15 +151,16 @@ load_combined_ci_data <- function(data_dir) {
#' @param harvesting_data Dataframe with harvesting information
#' @param field_CI_data Dataframe with CI measurements
#' @param season Year of the growing season
#' @param verbose Logical: whether to log warnings/info (default TRUE). Set to FALSE during progress bar iteration.
#' @return Dataframe with interpolated daily CI values
#'
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) {
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, verbose = TRUE) {
# Filter harvesting data for the given season and field name
filtered_harvesting_data <- harvesting_data %>%
dplyr::filter(year == season, sub_field == field_name)
if (nrow(filtered_harvesting_data) == 0) {
safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
if (verbose) safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
return(data.frame())
}
@ -83,7 +170,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
# Return an empty data frame if no CI data is found
if (nrow(filtered_field_CI_data) == 0) {
safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
if (verbose) safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
return(data.frame())
}
@ -107,12 +194,14 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
# If CI is empty after filtering, return an empty dataframe
if (nrow(CI) == 0) {
if (verbose) {
safe_log(paste0("No CI data within season dates for field: ", field_name,
" (Season: ", season, ", dates: ",
format(season_start, "%Y-%m-%d"), " to ",
format(season_end, "%Y-%m-%d"),
"). Available CI data range: ", ci_date_range),
"WARNING")
}
return(data.frame())
}
@ -125,25 +214,23 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
subField = field_name
)
# Log successful interpolation
safe_log(paste0("Successfully interpolated CI data for field: ", field_name,
" (Season: ", season, ", dates: ",
format(season_start, "%Y-%m-%d"), " to ",
format(season_end, "%Y-%m-%d"),
"). ", nrow(CI), " data points created."))
# Return data with success status
return(CI)
}, error = function(e) {
# Return empty dataframe on error (will be tracked separately)
if (verbose) {
safe_log(paste0("Error interpolating CI data for field ", field_name,
" in season ", season,
" (", format(season_start, "%Y-%m-%d"), " to ",
format(season_end, "%Y-%m-%d"),
"): ", e$message), "ERROR")
}
return(data.frame())
})
}
#' Generate interpolated CI data for all fields and seasons
#' PARALLELIZE: Processes fields in parallel using furrr::future_map_df()
#'
#' @param years Vector of years to process
#' @param harvesting_data Dataframe with harvesting information
@ -153,17 +240,19 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
safe_log("Starting CI data interpolation for all fields")
# Track failed fields for end-of-run summary
failed_fields <- list()
total_fields <- 0
successful_fields <- 0
# Process each year
result <- purrr::map_df(years, function(yr) {
safe_log(paste("Processing year:", yr))
# Get the fields harvested in this year with valid season start dates
sub_fields <- harvesting_data %>%
dplyr::filter(year == yr, !is.na(season_start)) %>%
dplyr::pull(sub_field)
if (length(sub_fields) == 0) {
safe_log(paste("No fields with valid season data for year:", yr), "WARNING")
return(data.frame())
}
@ -172,24 +261,74 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
purrr::keep(~ any(ci_data$sub_field == .x))
if (length(valid_sub_fields) == 0) {
safe_log(paste("No fields with CI data for year:", yr), "WARNING")
return(data.frame())
}
# Extract and interpolate data for each valid field
safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr))
total_fields <<- total_fields + length(valid_sub_fields)
safe_log(sprintf("Year %d: Processing %d fields in parallel", yr, length(valid_sub_fields)))
result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x,
# Set up parallel future plan for field interpolation
# Allocate 1 core per ~100 fields (with minimum 2 cores)
n_cores <- max(2, min(parallel::detectCores() - 1, ceiling(length(valid_sub_fields) / 100)))
future::plan(strategy = future::multisession, workers = n_cores)
# PARALLELIZE: Process all fields in parallel (each extracts & interpolates independently)
result_list <- furrr::future_map(
valid_sub_fields,
.progress = TRUE,
.options = furrr::furrr_options(seed = TRUE),
function(field) {
# Call with verbose=FALSE to suppress warnings during parallel iteration
extract_CI_data(field,
harvesting_data = harvesting_data,
field_CI_data = ci_data,
season = yr)) %>%
purrr::list_rbind()
season = yr,
verbose = FALSE)
}
)
safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr))
return(result)
# Return to sequential processing
future::plan(future::sequential)
# Process results and tracking
for (i in seq_along(result_list)) {
field_result <- result_list[[i]]
field_name <- valid_sub_fields[i]
if (nrow(field_result) > 0) {
successful_fields <<- successful_fields + 1
} else {
failed_fields[[length(failed_fields) + 1]] <<- list(
field = field_name,
season = yr,
reason = "Unable to generate interpolated data"
)
}
}
# Combine all results for this year
result_list <- result_list[sapply(result_list, nrow) > 0] # Keep only non-empty
if (length(result_list) > 0) {
purrr::list_rbind(result_list)
} else {
data.frame()
}
})
safe_log(paste("Total interpolated data points:", nrow(result)))
# Print summary
safe_log(sprintf("\n=== Interpolation Summary ==="))
safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields))
if (length(failed_fields) > 0) {
safe_log(sprintf("Failed to interpolate: %d fields", length(failed_fields)))
for (failure in failed_fields) {
safe_log(sprintf(" - Field %s (Season %d): %s",
failure$field, failure$season, failure$reason), "WARNING")
}
}
safe_log(sprintf("Total interpolated data points: %d", nrow(result)))
return(result)
}
@ -222,11 +361,19 @@ calculate_growth_metrics <- function(interpolated_data) {
#' @return Path to the saved file
#'
save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") {
# Validate input
if (is.null(output_dir) || !is.character(output_dir) || length(output_dir) == 0) {
stop("output_dir must be a non-empty character string")
}
# Normalize path separators for Windows compatibility
output_dir <- normalizePath(output_dir, winslash = "/", mustWork = FALSE)
# Create output directory if it doesn't exist
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Create full file path
file_path <- here::here(output_dir, file_name)
# Create full file path using file.path (more robust than here::here for absolute paths)
file_path <- file.path(output_dir, file_name)
# Save the data
saveRDS(data, file_path)

View file

@ -1,71 +1,142 @@
# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\interpolate_growth_model.R
# ============================================================================
# SCRIPT 30: Growth Model Interpolation (CI Time Series)
# ============================================================================
# PURPOSE:
# Interpolate Canopy Index (CI) values across time to create continuous
# growth curves. Fills gaps in measurement dates, applies smoothing via
# LOESS, and generates daily CI estimates and cumulative statistics for
# each field. Enables downstream yield prediction and trend analysis.
#
# INTERPOLATE_GROWTH_MODEL.R
# =========================
# This script interpolates CI (Chlorophyll Index) values between measurement dates
# to create a continuous growth model. It generates daily values and cumulative
# CI statistics for each field.
# INPUT DATA:
# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
# - Format: RDS (wide format: fields × dates with CI values)
# - Requirement: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
#
# Usage: Rscript interpolate_growth_model.R [project_dir]
# - project_dir: Project directory name (e.g., "chemba")
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/interpolated_ci/
# - Format: RDS files per field (daily CI estimates)
# - Also exports: Growth model curves as RDS (cumulative CI, daily values)
#
# USAGE:
# Rscript 30_interpolate_growth_model.R [project]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - core growth monitoring
# - agronomic_support (AURA): Yes - field health trend analysis
#
# DEPENDENCIES:
# - Packages: tidyverse, lubridate
# - Utils files: parameters_project.R, 00_common_utils.R, 30_growth_model_utils.R
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
# - Input data: combined_CI_data.rds from Script 20
# - Data directories: interpolated_ci/ (created if missing)
#
# NOTES:
# - Interpolation method: LOESS smoothing with span = 0.3 (sensitive to local trends)
# - Gap-filling: Assumes continuous growth between measurements; skips clouds
# - Cumulative CI: Sum of daily interpolated values from planting to current date
# - Used by: Script 80 (KPI trends) and Script 12 (yield forecasting)
# - Critical for 8-week CV trend calculation and 4-week growth categorization
#
# RELATED ISSUES:
# SC-112: Utilities restructuring
# SC-108: Core pipeline improvements
#
# ============================================================================
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
library(tidyverse)
library(lubridate)
library(here)
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Data manipulation
library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases)
# Parallel processing (Windows PSOCK + Mac/Linux fork-safe)
library(future) # For setting up parallel execution plans
library(furrr) # For future_map_dfr (parallel file I/O and field processing)
library(parallel) # For detectCores (automatic CPU detection)
library(data.table) # For fast filtering on large datasets
})
# 2. Main function to handle interpolation
# -------------------------------------
main <- function() {
# Process command line arguments
args <- commandArgs(trailingOnly = TRUE)
# =============================================================================
# MAIN PROCESSING FUNCTION
# =============================================================================
# Get project directory from arguments or use default
if (length(args) >= 1 && !is.na(args[1])) {
project_dir <- as.character(args[1])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
project_dir <- "esa"
message("No project_dir provided. Using default:", project_dir)
main <- function() {
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# Make project_dir available globally so parameters_project.R can use it
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
# Parse command-line arguments FIRST
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
# Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
ci_extraction_script <- TRUE
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
# Initialize project configuration and load utility functions
# Load parameters_project.R (provides setup_project_directories, etc.)
tryCatch({
source("parameters_project.R")
source("30_growth_model_utils.R")
source("r_app/parameters_project.R")
}, error = function(e) {
warning("Default source files not found. Attempting to source from 'r_app' directory.")
tryCatch({
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "30_growth_model_utils.R"))
warning(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) {
stop("Failed to source required files from both default and 'r_app' directories.")
})
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
log_message("Starting CI growth model interpolation")
# Load growth model utilities
tryCatch({
source("r_app/30_growth_model_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 30_growth_model_utils.R: %s\n", e$message))
stop(e)
})
# STEP 3: Now all utilities are loaded, proceed with script logic
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
safe_log(sprintf("Project: %s", project_dir))
# Set up directory paths from parameters
setup <- setup_project_directories(project_dir)
# For per-field architecture: read from daily_ci_vals_dir (Script 20 per-field output)
daily_vals_dir <- setup$daily_ci_vals_dir
safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir))
safe_log("Starting CI growth model interpolation")
# Set up data directory paths
data_dir <- setup$data_dir
# Load and process the data
tryCatch({
# Load the combined CI data
CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
# Load the combined CI data (created by Script 20 per-field)
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
# OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files)
harvesting_data <- tryCatch({
load_harvesting_data(data_dir)
}, error = function(e) {
safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING")
NULL
})
# Load CI data with date range pre-filtering
CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data)
# Validate harvesting data
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
safe_log("No harvesting data available", "ERROR")
stop("No harvesting data available")
}
@ -75,7 +146,7 @@ main <- function() {
distinct(year) %>%
pull(year)
log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
# Generate interpolated CI data for each year and field
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
@ -89,20 +160,20 @@ main <- function() {
# Add daily and cumulative metrics
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
# Save the processed data
# Save the processed data to cumulative_vals directory
save_growth_model(
CI_all_with_metrics,
cumulative_CI_vals_dir,
setup$cumulative_ci_vals_dir,
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
)
} else {
log_message("No CI data was generated after interpolation", level = "WARNING")
safe_log("No CI data was generated after interpolation", "WARNING")
}
log_message("Growth model interpolation completed successfully")
safe_log("Growth model interpolation completed successfully")
}, error = function(e) {
log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
stop(e$message)
})
}

View file

@ -1,215 +0,0 @@
# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\mosaic_creation.R
#
# MOSAIC_CREATION.R
# ===============
# This script creates weekly mosaics from daily satellite imagery.
# It handles command-line arguments and initiates the mosaic creation process.
#
# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [use_tiles] [tile_size]
# - end_date: End date for processing (YYYY-MM-DD format)
# - offset: Number of days to look back from end_date
# - project_dir: Project directory name (e.g., "chemba")
# - file_name: Optional custom output file name
# - use_tiles: Use tile-based processing for memory efficiency (TRUE/FALSE, default: FALSE)
# - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE)
#
# Examples:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 angata
#
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(here)
})
# 2. Process command line arguments and run mosaic creation
# ------------------------------------------------------
main <- function() {
# Capture command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Process project_dir argument with default
if (length(args) >= 3 && !is.na(args[3])) {
project_dir <- as.character(args[3])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
# Default project directory
project_dir <- "angata"
message("No project_dir provided. Using default:", project_dir)
}
# Make project_dir available globally so parameters_project.R can use it
assign("project_dir", project_dir, envir = .GlobalEnv)
# Process end_date argument with default
if (length(args) >= 1 && !is.na(args[1])) {
end_date <- as.Date(args[1])
if (is.na(end_date)) {
message("Invalid end_date provided. Using current date.")
end_date <- Sys.Date()
}
} else if (exists("end_date_str", envir = .GlobalEnv)) {
end_date <- as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
# Default to current date if no argument is provided
end_date <- Sys.Date()
message("No end_date provided. Using current date: ", format(end_date))
}
# Process offset argument with default
if (length(args) >= 2 && !is.na(args[2])) {
offset <- as.numeric(args[2])
if (is.na(offset) || offset <= 0) {
message("Invalid offset provided. Using default (7 days).")
offset <- 7
}
} else {
# Default to 7 days if no argument is provided
offset <- 7
message("No offset provided. Using default:", offset, "days")
}
# 3. Initialize project configuration
# --------------------------------
# Detect which data source directory exists (merged_tif or merged_tif_8b)
laravel_storage <- here::here("laravel_app/storage/app", project_dir)
data_source <- if (dir.exists(file.path(laravel_storage, "merged_tif_8b"))) {
message("Detected data source: merged_tif_8b (8-band optimized)")
"merged_tif_8b"
} else if (dir.exists(file.path(laravel_storage, "merged_tif"))) {
message("Detected data source: merged_tif (legacy 4-band)")
"merged_tif"
} else {
message("Warning: No data source found. Using default: merged_tif_8b")
"merged_tif_8b"
}
# Set global data_source for parameters_project.R
assign("data_source", data_source, envir = .GlobalEnv)
tryCatch({
source("parameters_project.R")
source("40_mosaic_creation_utils.R")
safe_log(paste("Successfully sourced files from default directory."))
}, error = function(e) {
message("Note: Could not open files from default directory (expected on some systems)")
message("Attempting to source from 'r_app' directory instead...")
tryCatch({
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "40_mosaic_creation_utils.R"))
message("✓ Successfully sourced files from 'r_app' directory")
}, error = function(e) {
stop("Failed to source required files from both default and 'r_app' directories.")
})
})
# 4. Generate date range for processing
# ---------------------------------
dates <- date_list(end_date, offset)
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
# Create output filename
file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) {
as.character(args[4])
} else {
paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")
}
safe_log(paste("Output will be saved as:", file_name_tif))
# 5. Create weekly mosaics - route based on project tile detection
# ---------------------------------------------------------------
# The use_tile_mosaic flag is auto-detected by parameters_project.R
# based on whether tiles exist in merged_final_tif/
if (!exists("use_tile_mosaic")) {
# Fallback detection if flag not set (shouldn't happen)
merged_final_dir <- file.path(laravel_storage, "merged_final_tif")
tile_detection <- detect_mosaic_mode(merged_final_dir)
use_tile_mosaic <- tile_detection$has_tiles
}
if (use_tile_mosaic) {
# TILE-BASED APPROACH: Create per-tile weekly MAX mosaics
# This is used for projects like Angata with large ROIs requiring spatial partitioning
# Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20)
tryCatch({
safe_log("Starting per-tile mosaic creation (tile-based approach)...")
# Detect grid size from merged_final_tif folder structure
# Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc.
merged_final_base <- file.path(laravel_storage, "merged_final_tif")
grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE)
# Look for grid size patterns like "5x5", "10x10", "20x20"
grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
if (length(grid_patterns) == 0) {
stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)")
}
grid_size <- grid_patterns[1] # Use first grid size found
safe_log(paste("Detected grid size:", grid_size))
# Point to the grid-specific merged_final_tif directory
merged_final_with_grid <- file.path(merged_final_base, grid_size)
# Set output directory for per-tile mosaics, organized by grid size
# Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif
tile_output_base <- file.path(laravel_storage, "weekly_tile_max", grid_size)
dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE)
created_tile_files <- create_weekly_mosaic_from_tiles(
dates = dates,
merged_final_dir = merged_final_with_grid,
tile_output_dir = tile_output_base,
field_boundaries = field_boundaries
)
safe_log(paste("✓ Per-tile mosaic creation completed - created",
length(created_tile_files), "tile files"))
}, error = function(e) {
safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR")
traceback()
stop("Mosaic creation failed")
})
} else {
# SINGLE-FILE APPROACH: Create single weekly mosaic file
# This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output
tryCatch({
safe_log("Starting single-file mosaic creation (backward-compatible approach)...")
# Set output directory for single-file mosaics
single_file_output_dir <- file.path(laravel_storage, "weekly_mosaic")
created_file <- create_weekly_mosaic(
dates = dates,
field_boundaries = field_boundaries,
daily_vrt_dir = daily_vrt,
merged_final_dir = merged_final,
output_dir = single_file_output_dir,
file_name_tif = file_name_tif,
create_plots = FALSE
)
safe_log(paste("✓ Single-file mosaic creation completed:", created_file))
}, error = function(e) {
safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR")
traceback()
stop("Mosaic creation failed")
})
}
}
if (sys.nframe() == 0) {
main()
}

View file

@ -0,0 +1,209 @@
# ============================================================================
# SCRIPT 40: Weekly Mosaic Creation (Per-Field CI Aggregation)
# ============================================================================
# PURPOSE:
# Aggregate daily per-field CI TIFFs into weekly mosaics. Handles multi-date
# merging, cloud masking, and produces 5-band weekly output for reporting
# and KPI calculations. Supports both per-field and grid-based tile architecture.
#
# INPUT DATA:
# - Source: laravel_app/storage/app/{project}/field_tiles_CI/{FIELD}/{DATE}.tif
# - Format: GeoTIFF (5-band: R,G,B,NIR,CI as float32)
# - Dates: All available dates within week range
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/
# - Format: GeoTIFF (5-band: R,G,B,NIR,CI)
# - Naming: week_WW_YYYY.tif (WW = ISO week, YYYY = ISO year)
#
# USAGE:
# Rscript 40_mosaic_creation_per_field.R [end_date] [offset] [project]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-01-12 7 angata
#
# PARAMETERS:
# - end_date: End date for processing (character, YYYY-MM-DD format, default today)
# - offset: Days to look back (numeric, default 7 for one week)
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - weekly monitoring
# - agronomic_support (AURA): Yes - field health reporting
#
# DEPENDENCIES:
# - Packages: terra, sf, tidyverse, lubridate
# - Utils files: parameters_project.R, 40_mosaic_creation_per_field_utils.R
# - Input data: Daily per-field CI TIFFs from Script 20
# - Data directories: field_tiles_CI/, weekly_mosaic/
#
# NOTES:
# - Aggregation method: Maximum CI value per pixel across week (handles clouds)
# - ISO week-year used for consistent date handling across year boundaries
# - Supports both single-file and tiled mosaic architectures
# - Output feeds Scripts 80 (KPI calculations) and 90/91 (reporting)
# - Critical for trend analysis: week-over-week CI comparison
#
# RELATED ISSUES:
# SC-112: Script 40 cleanup (deleted legacy mosaic utils files)
# SC-108: Core pipeline improvements
#
# ============================================================================
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(here)
})
# 2. Main execution function
# -------------------------
main <- function() {
cat("\n")
cat("========================================================\n")
cat(" Script 40: Per-Field Weekly Mosaic Creation\n")
cat("========================================================\n\n")
# Capture command line arguments
args <- commandArgs(trailingOnly = TRUE)
# ==== Process Arguments ====
# Project directory
if (length(args) >= 3 && !is.na(args[3])) {
project_dir <- as.character(args[3])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
project_dir <- "angata"
message("[INFO] No project_dir provided. Using default: angata")
}
assign("project_dir", project_dir, envir = .GlobalEnv)
message(paste("[INFO] Project:", project_dir))
# End date
if (length(args) >= 1 && !is.na(args[1])) {
end_date <- as.Date(args[1], format = "%Y-%m-%d")
if (is.na(end_date)) {
message("[WARNING] Invalid end_date. Using current date.")
end_date <- Sys.Date()
}
} else {
end_date <- Sys.Date()
message(paste("[INFO] No end_date provided. Using current date:", format(end_date)))
}
# Offset (days back)
if (length(args) >= 2 && !is.na(args[2])) {
offset <- as.numeric(args[2])
if (is.na(offset) || offset <= 0) {
message("[WARNING] Invalid offset. Using default: 7 days")
offset <- 7
}
} else {
offset <- 7
message("[INFO] No offset provided. Using default: 7 days")
}
# ==== Load Configuration ====
# Set working directory if needed
if (basename(getwd()) == "r_app") {
setwd("..")
}
tryCatch({
source("r_app/parameters_project.R")
message("[INFO] ✓ Loaded parameters_project.R")
}, error = function(e) {
stop("[ERROR] Failed to load parameters_project.R: ", e$message)
})
tryCatch({
source("r_app/40_mosaic_creation_per_field_utils.R")
message("[INFO] ✓ Loaded 40_mosaic_creation_per_field_utils.R")
}, error = function(e) {
stop("[ERROR] Failed to load utilities: ", e$message)
})
# ==== Get Project Directories ====
setup <- setup_project_directories(project_dir)
# Determine input/output directories
# Input: field_tiles_CI/ (from Script 20)
field_tiles_ci_dir <- setup$field_tiles_ci_dir
# Output: weekly_mosaic/ (for Scripts 90/91)
weekly_mosaic_output_dir <- file.path(setup$laravel_storage_dir, "weekly_mosaic")
message(paste("[INFO] Input directory:", field_tiles_ci_dir))
message(paste("[INFO] Output directory:", weekly_mosaic_output_dir))
# ==== Validate Input Directory ====
if (!dir.exists(field_tiles_ci_dir)) {
stop(paste("[ERROR] Input directory not found:", field_tiles_ci_dir,
"\nScript 20 (CI extraction) must be run first to create per-field TIFFs."))
}
# Check if directory has any TIFFs
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE)
if (length(field_dirs) == 0) {
stop(paste("[ERROR] No field subdirectories found in:", field_tiles_ci_dir))
}
message(paste("[INFO] Found", length(field_dirs), "field directories"))
# ==== Generate Date Range ====
dates <- date_list(end_date, offset)
# Validate week calculation
message(sprintf("[INFO] Requested offset: %d days", offset))
message(sprintf("[INFO] End date: %s", format(end_date, "%Y-%m-%d")))
message(sprintf("[INFO] Start date: %s", format(dates$start_date, "%Y-%m-%d")))
message(sprintf("[INFO] Calculating ISO week: %d", dates$week))
message(sprintf("[INFO] Calculating ISO year: %d", dates$year))
# ==== Create Per-Field Weekly Mosaics ====
created_files <- create_all_field_weekly_mosaics(
dates = dates,
field_tiles_ci_dir = field_tiles_ci_dir,
output_dir = weekly_mosaic_output_dir
)
# ==== Summary ====
message("\n")
message("========================================================")
message(paste(" COMPLETED"))
message(paste(" Created:", length(created_files), "weekly field mosaics"))
message("========================================================\n")
if (length(created_files) > 0) {
message("[SUCCESS] Weekly mosaics ready for reporting (Scripts 90/91)")
} else {
message("[WARNING] No mosaics created - check input data")
}
return(invisible(created_files))
}
# Execute main if script is run directly
if (sys.nframe() == 0) {
tryCatch({
created <- main()
quit(save = "no", status = 0)
}, error = function(e) {
message(paste("\n[FATAL ERROR]", e$message))
quit(save = "no", status = 1)
})
}

View file

@ -0,0 +1,268 @@
# MOSAIC_CREATION_PER_FIELD_UTILS.R
# ==================================
# Utility functions for creating per-field weekly mosaics from per-field daily TIFFs.
#
# This module aggregates daily per-field 5-band TIFFs (R,G,B,NIR,CI from Script 20)
# into weekly per-field mosaics using MAX compositing.
#
# DATA FLOW:
# Script 20: field_tiles_CI/{FIELD}/{DATE}.tif (5-band daily, per-field)
# ↓
# Script 40 NEW (this module):
# For each field:
# For each week:
# - Find all daily TIFFs for that week
# - Stack & create MAX composite
# - Save: weekly_mosaic/{FIELD}/week_WW_YYYY.tif
# ↓
# Scripts 90/91: Read weekly_mosaic/{FIELD}/week_WW_YYYY.tif (unchanged interface)
#' Generate date range for processing (ISO week-based)
#'
#' @param end_date The end date (Date object or YYYY-MM-DD string)
#' @param offset Number of days to look back from end_date (typically 7 for one week)
#' @return List with week, year, start_date, end_date, days_filter (vector of YYYY-MM-DD strings)
#'
date_list <- function(end_date, offset) {
if (!lubridate::is.Date(end_date)) {
end_date <- as.Date(end_date)
if (is.na(end_date)) {
stop("Invalid end_date. Expected Date object or YYYY-MM-DD string.")
}
}
offset <- as.numeric(offset)
if (is.na(offset) || offset < 1) {
stop("Invalid offset. Expected positive number.")
}
offset <- offset - 1 # Adjust to include end_date
start_date <- end_date - lubridate::days(offset)
week <- lubridate::isoweek(end_date)
year <- lubridate::isoyear(end_date)
# Validate: Check that all dates in range belong to same ISO week
start_week <- lubridate::isoweek(start_date)
start_year <- lubridate::isoyear(start_date)
if (start_week != week || start_year != year) {
safe_log(sprintf("WARNING: Date range spans multiple ISO weeks! Start: week %d/%d, End: week %d/%d. Using END date week %d/%d.",
start_week, start_year, week, year, week, year), "WARNING")
}
days_filter <- seq(from = start_date, to = end_date, by = "day")
days_filter <- format(days_filter, "%Y-%m-%d")
safe_log(paste("Date range:", start_date, "to", end_date,
"(week", week, "of", year, ")"))
return(list(
week = week,
year = year,
start_date = start_date,
end_date = end_date,
days_filter = days_filter
))
}
#' Find all per-field daily TIFFs for a specific week
#'
#' @param field_tiles_ci_dir Base directory containing per-field TIFFs
#' (e.g., field_tiles_CI/)
#' @param days_filter Vector of YYYY-MM-DD dates to match
#' @return List with field names and their matching TIFF files for the week
#'
find_per_field_tiffs_for_week <- function(field_tiles_ci_dir, days_filter) {
if (!dir.exists(field_tiles_ci_dir)) {
safe_log(paste("Field TIFFs directory not found:", field_tiles_ci_dir), "WARNING")
return(list())
}
# List all field subdirectories
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE)
if (length(field_dirs) == 0) {
safe_log("No field subdirectories found in field_tiles_CI/", "WARNING")
return(list())
}
# For each field, find TIFF files matching the week's dates
field_tiffs <- list()
for (field in field_dirs) {
field_path <- file.path(field_tiles_ci_dir, field)
# Find all TIFF files in this field directory
tiff_files <- list.files(field_path, pattern = "\\.tif$", full.names = TRUE)
if (length(tiff_files) == 0) {
next # Skip fields with no TIFFs
}
# Filter to only those matching week's dates
matching_files <- tiff_files[grepl(paste(days_filter, collapse = "|"), tiff_files)]
if (length(matching_files) > 0) {
field_tiffs[[field]] <- sort(matching_files)
}
}
safe_log(paste("Found TIFFs for", length(field_tiffs), "fields in week"))
return(field_tiffs)
}
#' Create weekly MAX composite for a single field
#'
#' Loads all daily TIFFs for a field+week combination and creates a MAX composite
#' (per-band maximum across all days).
#'
#' @param tiff_files Vector of TIFF file paths for this field+week
#' @param field_name Name of the field (for logging)
#' @return SpatRaster with 5 bands (R,G,B,NIR,CI), or NULL if fails
#'
create_field_weekly_composite <- function(tiff_files, field_name) {
if (length(tiff_files) == 0) {
return(NULL)
}
tryCatch({
# Load all TIFFs
rasters <- list()
for (file in tiff_files) {
tryCatch({
r <- terra::rast(file)
rasters[[length(rasters) + 1]] <- r
}, error = function(e) {
# Silently skip load errors (they're already counted)
})
}
if (length(rasters) == 0) {
return(NULL)
}
# Create MAX composite
if (length(rasters) == 1) {
composite <- rasters[[1]]
} else {
# Stack all rasters and apply MAX per pixel per band
collection <- terra::sprc(rasters)
composite <- terra::mosaic(collection, fun = "max")
}
# Ensure 5 bands with expected names
if (terra::nlyr(composite) >= 5) {
composite <- terra::subset(composite, 1:5)
names(composite) <- c("Red", "Green", "Blue", "NIR", "CI")
}
return(composite)
}, error = function(e) {
safe_log(paste("Error creating composite for field", field_name, ":", e$message), "ERROR")
return(NULL)
})
}
#' Save per-field weekly mosaic
#'
#' @param raster SpatRaster to save
#' @param output_dir Base output directory (e.g., laravel_app/storage/app/{project}/weekly_mosaic/)
#' @param field_name Name of the field
#' @param week Week number (ISO week)
#' @param year Year (ISO year)
#' @return File path of saved TIFF, or NULL if fails
#'
save_field_weekly_mosaic <- function(raster, output_dir, field_name, week, year) {
if (is.null(raster)) {
return(NULL)
}
tryCatch({
# Create field-specific output directory
field_output_dir <- file.path(output_dir, field_name)
dir.create(field_output_dir, recursive = TRUE, showWarnings = FALSE)
# Generate filename: week_WW_YYYY.tif
filename <- sprintf("week_%02d_%04d.tif", week, year)
file_path <- file.path(field_output_dir, filename)
# Save raster (silently)
terra::writeRaster(raster, file_path, overwrite = TRUE)
return(file_path)
}, error = function(e) {
safe_log(paste("Error saving mosaic for field", field_name, ":", e$message), "ERROR")
return(NULL)
})
}
#' Create all weekly mosaics for all fields in a week
#'
#' Main orchestration function. Loops over all fields and creates weekly mosaics.
#'
#' @param dates List from date_list() - contains week, year, days_filter
#' @param field_tiles_ci_dir Input: field_tiles_CI/ directory
#' @param output_dir Output: weekly_mosaic/ directory
#' @return Vector of successfully created file paths
#'
create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_dir) {
safe_log(paste("=== Creating Per-Field Weekly Mosaics ==="))
safe_log(paste("Week:", dates$week, "Year:", dates$year))
# Find all per-field TIFFs for this week
field_tiffs <- find_per_field_tiffs_for_week(field_tiles_ci_dir, dates$days_filter)
if (length(field_tiffs) == 0) {
safe_log("No per-field TIFFs found for this week - returning empty list", "WARNING")
return(character())
}
safe_log(paste("Processing", length(field_tiffs), "fields..."))
created_files <- character()
# Initialize progress bar
pb <- txtProgressBar(min = 0, max = length(field_tiffs), style = 3, width = 50)
counter <- 0
# Process each field
for (field_name in names(field_tiffs)) {
counter <- counter + 1
tiff_files <- field_tiffs[[field_name]]
# Create composite
composite <- create_field_weekly_composite(tiff_files, field_name)
if (!is.null(composite)) {
# Save
saved_path <- save_field_weekly_mosaic(
composite,
output_dir,
field_name,
dates$week,
dates$year
)
if (!is.null(saved_path)) {
created_files <- c(created_files, saved_path)
}
}
setTxtProgressBar(pb, counter)
}
close(pb)
cat("\n") # New line after progress bar
safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics"))
return(created_files)
}

View file

@ -1,286 +0,0 @@
# MOSAIC_CREATION_TILE_UTILS.R
# ============================
# Tile-based processing utilities for scalable weekly mosaic creation
#
# STRATEGY: Memory-efficient, scalable approach for large study areas
# Split daily full-extent mosaics into fixed-size tiles (e.g., 5×5 km),
# then process each tile position across all days, and reassemble.
#
# WORKFLOW:
# Daily full-extent TIFF
# ↓
# Split into N×M tiles (based on area size / tile_size)
# E.g., 50×80 km area with 5 km tiles = 10×16 = 160 tiles
# ↓
# For EACH TILE POSITION (1 to 160):
# - Load that tile from all 7 days
# - Create MAX composite for that tile
# - Save tile_NNN_MAX.tif
# ↓
# Reassemble all 160 MAX tiles back into full-extent
# ↓
# Save weekly full-extent mosaic
#
# KEY BENEFIT: Memory usage ~50 MB per tile (5×5 km), not 1.3 GB (full 50×80 km)
# Scales automatically: bigger area = more tiles, same memory per tile
#
# TILE_SIZE: Configurable (default 5000 m = 5 km, adjustable via parameter)
#' Simple tile-based processing using terra::makeTiles()
#'
#' Calculates tile grid based on desired tile SIZE (e.g., 5 km), not grid count.
#' This makes it SCALABLE: bigger area = more tiles automatically.
#'
#' @param raster_path Path to raster to split
#' @param output_dir Directory to save tiles
#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km)
#' @return List with tiles (file paths) and metadata
#'
split_raster_into_tiles <- function(raster_path, output_dir, tile_size_m = 5000) {
# Load raster
r <- terra::rast(raster_path)
# Calculate how many tiles we need based on raster extent and tile size
x_range <- terra::ext(r)$xmax - terra::ext(r)$xmin
y_range <- terra::ext(r)$ymax - terra::ext(r)$ymin
n_tiles_x <- ceiling(x_range / tile_size_m)
n_tiles_y <- ceiling(y_range / tile_size_m)
safe_log(paste("Splitting into", n_tiles_x, "×", n_tiles_y, "tiles of",
tile_size_m / 1000, "km"))
# Create output directory
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Use terra::makeTiles() - it splits based on n = c(rows, cols)
tiles <- terra::makeTiles(
r,
n = c(n_tiles_y, n_tiles_x), # rows, cols
filename = file.path(output_dir, "tile_.tif"),
overwrite = TRUE
)
safe_log(paste("Created", length(tiles), "tile files"))
return(list(
tiles = tiles,
n_tiles = length(tiles),
n_tiles_x = n_tiles_x,
n_tiles_y = n_tiles_y,
extent = terra::ext(r),
raster = raster_path
))
}
#' Create weekly MAX mosaic using TRUE tile-based processing
#'
#' TILE-BASED WORKFLOW (Memory efficient):
#' 1. Calculate tile grid dynamically from extent and FIXED tile_size (e.g., 5 km)
#' 2. For EACH TILE across all 7 days:
#' - Load that tile from each daily file (small ~50 MB, not 1.3 GB)
#' - Create MAX composite for just that tile
#' 3. Reassemble all tiles into final full-extent mosaic
#'
#' SCALABILITY: Fixed 5 km tile size means bigger area = more tiles (automatic scaling)
#'
#' @param dates List from date_list() with days_filter
#' @param merged_final_dir Directory with daily merged full-extent TIFFs
#' @param final_output_dir Directory to store final reassembled mosaic
#' @param file_name_tif Output filename for final mosaic
#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km). Bigger area automatically gets more tiles.
#' @return Path to final reassembled weekly mosaic
#'
create_weekly_mosaic_tiled <- function(dates, merged_final_dir,
final_output_dir, file_name_tif,
tile_size_m = 5000) {
# Get daily files for this week
daily_files <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$")
daily_files <- daily_files[grepl(paste(dates$days_filter, collapse = "|"), daily_files)]
if (length(daily_files) == 0) {
safe_log("No daily files found for this week", "ERROR")
return(NULL)
}
safe_log(paste("Found", length(daily_files), "daily files for week", dates$week))
# Load first daily file to get extent and calculate tile grid
safe_log("Step 1: Loading first daily file to establish tile structure")
first_raster <- terra::rast(daily_files[1])
# Get CRS and convert tile_size_m to appropriate units
raster_crs <- terra::crs(first_raster)
# If raster is in lat/lon (geographic), convert tile_size_m to degrees
# 1 degree latitude ≈ 111 km, so 5000 m ≈ 0.045 degrees
# Check for GEOGCRS (geographic coordinate system) in WKT string
is_geographic <- grepl("GEOGCRS|longlat|geographic|ANGLEUNIT.*degree",
as.character(raster_crs), ignore.case = TRUE)
if (is_geographic) {
# Geographic CRS - convert meters to degrees
# At equator: 1 degree ≈ 111 km
tile_size_degrees <- tile_size_m / 111000 # 111 km per degree
safe_log(paste("Raster is in geographic CRS (lat/lon). Converting", tile_size_m / 1000,
"km to", round(tile_size_degrees, 4), "degrees"))
} else {
# Projected CRS - use meters directly
tile_size_degrees <- tile_size_m
safe_log(paste("Raster is in projected CRS. Using", tile_size_m / 1000, "km directly"))
}
# Calculate n_tiles dynamically from extent and tile_size
x_range <- terra::ext(first_raster)$xmax - terra::ext(first_raster)$xmin
y_range <- terra::ext(first_raster)$ymax - terra::ext(first_raster)$ymin
n_tiles_x <- ceiling(x_range / tile_size_degrees)
n_tiles_y <- ceiling(y_range / tile_size_degrees)
n_tiles_total <- n_tiles_x * n_tiles_y
safe_log(paste("Step 2: Creating tile grid:", tile_size_m / 1000, "km tiles"))
safe_log(paste(" Extent:", round(x_range, 4), "° ×", round(y_range, 4), "°"))
safe_log(paste(" Grid:", n_tiles_y, "rows ×", n_tiles_x, "columns =", n_tiles_total, "tiles"))
# Calculate tile extents mathematically (no need to save temp files)
extent <- terra::ext(first_raster)
x_min <- extent$xmin
y_min <- extent$ymin
# Create list of tile extents
tile_extents <- list()
tile_idx <- 0
for (row in 1:n_tiles_y) {
for (col in 1:n_tiles_x) {
tile_idx <- tile_idx + 1
# Calculate this tile's bounds
tile_xmin <- x_min + (col - 1) * tile_size_degrees
tile_xmax <- min(tile_xmin + tile_size_degrees, extent$xmax)
tile_ymin <- y_min + (row - 1) * tile_size_degrees
tile_ymax <- min(tile_ymin + tile_size_degrees, extent$ymax)
tile_extents[[tile_idx]] <- terra::ext(tile_xmin, tile_xmax, tile_ymin, tile_ymax)
}
}
safe_log(paste("Calculated", length(tile_extents), "tile extents"))
# Save tiles to Laravel storage directory
tile_storage_dir <- file.path("laravel_app", "storage", "app", "angata", "daily_tiles")
dir.create(tile_storage_dir, recursive = TRUE, showWarnings = FALSE)
# For each tile, load from all daily files and create MAX
safe_log("Step 3: Processing tiles (load per-tile across all days, create MAX for each)")
tile_files_list <- list()
for (tile_idx in seq_along(tile_extents)) {
if (tile_idx %% max(1, ceiling(n_tiles_total / 10)) == 0 || tile_idx == 1) {
safe_log(paste(" Processing tile", tile_idx, "of", n_tiles_total))
}
# Get this tile's extent
tile_extent <- tile_extents[[tile_idx]]
# Load and crop all daily files to this tile extent
daily_tile_data <- list()
for (file_idx in seq_along(daily_files)) {
tryCatch({
r <- terra::rast(daily_files[file_idx])
cropped <- terra::crop(r, tile_extent, snap = "in")
daily_tile_data[[file_idx]] <- cropped
}, error = function(e) {
safe_log(paste("Warning: Could not crop tile", tile_idx, "from",
basename(daily_files[file_idx])), "WARNING")
})
}
if (length(daily_tile_data) == 0) {
safe_log(paste("No valid data for tile", tile_idx), "WARNING")
next
}
# Create MAX composite for this tile
if (length(daily_tile_data) == 1) {
tile_max <- daily_tile_data[[1]]
} else {
collection <- terra::sprc(daily_tile_data)
tile_max <- terra::mosaic(collection, fun = "max")
}
# Save tile to disk (keeps memory low)
tile_file <- file.path(tile_storage_dir, sprintf("tile_%04d.tif", tile_idx))
terra::writeRaster(tile_max, tile_file, overwrite = TRUE)
tile_files_list[[tile_idx]] <- tile_file
}
if (length(tile_files_list) == 0) {
safe_log("No tiles processed successfully", "ERROR")
return(NULL)
}
safe_log(paste("Step 4: Reassembling", length(tile_files_list), "tiles from disk into full-extent mosaic"))
# Load all tile files and reassemble
tile_rasters <- lapply(tile_files_list, terra::rast)
collection <- terra::sprc(tile_rasters)
final_mosaic <- terra::mosaic(collection, fun = "first")
safe_log("Step 5: Saving final reassembled mosaic")
# Save
dir.create(final_output_dir, recursive = TRUE, showWarnings = FALSE)
final_file_path <- file.path(final_output_dir, file_name_tif)
tryCatch({
terra::writeRaster(final_mosaic, final_file_path, overwrite = TRUE)
safe_log(paste("✓ Weekly mosaic saved to:", final_file_path))
}, error = function(e) {
safe_log(paste("Error saving mosaic:", e$message), "ERROR")
return(NULL)
})
# Cleanup temporary tile files
unlink(tile_storage_dir, recursive = TRUE)
safe_log("Cleaned up temporary tile files")
return(final_file_path)
}
#' Load tile MAX rasters for a specific week (for per-tile analysis)
#'
#' @param week Week number
#' @param tile_dir Directory containing tile mosaics (week subdirectories)
#' @return List of tile rasters with tile_id metadata
#'
load_tile_mosaics <- function(week, tile_dir) {
week_dir <- file.path(tile_dir, paste0("week_", week))
if (!dir.exists(week_dir)) {
warning(paste("Tile directory not found:", week_dir))
return(NULL)
}
# Load all tile files
tile_files <- list.files(week_dir, pattern = "^tile_.*\\.tif$", full.names = TRUE)
if (length(tile_files) == 0) {
warning("No tile files found in:", week_dir)
return(NULL)
}
# Sort by tile ID
tile_numbers <- as.numeric(gsub(".*tile_(\\d+).*", "\\1", tile_files))
tile_files <- tile_files[order(tile_numbers)]
# Load rasters
tiles_list <- lapply(tile_files, terra::rast)
names(tiles_list) <- sprintf("tile_%03d", sort(tile_numbers))
safe_log(paste("Loaded", length(tiles_list), "tile mosaics for week", week))
return(tiles_list)
}

View file

@ -1,827 +0,0 @@
# MOSAIC_CREATION_UTILS.R
# ======================
# Utility functions for creating weekly mosaics from daily satellite imagery.
# These functions support cloud cover assessment, date handling, and mosaic creation.
#' Detect whether a project uses tile-based or single-file mosaic approach
#'
#' @param merged_final_tif_dir Directory containing merged_final_tif files
#' @return List with has_tiles (logical), detected_tiles (vector), total_files (count)
#'
detect_mosaic_mode <- function(merged_final_tif_dir) {
# Check if directory exists
if (!dir.exists(merged_final_tif_dir)) {
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
}
# List all .tif files in merged_final_tif
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
if (length(tif_files) == 0) {
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
}
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
tile_pattern <- "_(\\d{2})\\.tif$"
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
has_tiles <- length(tile_files) > 0
return(list(
has_tiles = has_tiles,
detected_tiles = tile_files,
total_files = length(tif_files)
))
}
#' Safe logging function
#' @param message The message to log
#' @param level The log level (default: "INFO")
#' @return NULL (used for side effects)
#'
safe_log <- function(message, level = "INFO") {
if (exists("log_message")) {
log_message(message, level)
} else {
if (level %in% c("ERROR", "WARNING")) {
warning(message)
} else {
message(message)
}
}
}
#' Generate a sequence of dates for processing
#'
#' @param end_date The end date for the sequence (Date object)
#' @param offset Number of days to look back from end_date
#' @return A list containing week number, year, and a sequence of dates for filtering
#'
date_list <- function(end_date, offset) {
# Input validation
if (!lubridate::is.Date(end_date)) {
end_date <- as.Date(end_date)
if (is.na(end_date)) {
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
}
}
offset <- as.numeric(offset)
if (is.na(offset) || offset < 1) {
stop("Invalid offset provided. Expected a positive number.")
}
# Calculate date range
offset <- offset - 1 # Adjust offset to include end_date
start_date <- end_date - lubridate::days(offset)
# Extract week and year information
week <- lubridate::isoweek(end_date)
year <- lubridate::isoyear(end_date)
# Generate sequence of dates
days_filter <- seq(from = start_date, to = end_date, by = "day")
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
# Log the date range
safe_log(paste("Date range generated from", start_date, "to", end_date))
return(list(
"week" = week,
"year" = year,
"days_filter" = days_filter,
"start_date" = start_date,
"end_date" = end_date
))
}
#' Create a weekly mosaic from available VRT files
#'
#' @param dates List from date_list() with date range info
#' @param field_boundaries Field boundaries for image cropping
#' @param daily_vrt_dir Directory containing VRT files
#' @param merged_final_dir Directory with merged final rasters
#' @param output_dir Output directory for weekly mosaics
#' @param file_name_tif Output filename for the mosaic
#' @param create_plots Whether to create visualization plots (default: TRUE)
#' @return The file path of the saved mosaic
#'
create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir,
merged_final_dir, output_dir, file_name_tif,
create_plots = FALSE) {
# Find VRT files for the specified date range
vrt_list <- find_vrt_files(daily_vrt_dir, dates)
# Find final raster files for fallback
raster_files_final <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$")
# Process the mosaic if VRT files are available
if (length(vrt_list) > 0) {
safe_log("VRT list created, assessing cloud cover for mosaic creation")
# Calculate aggregated cloud cover statistics (returns data frame for image selection)
cloud_coverage_stats <- count_cloud_coverage(vrt_list, merged_final_dir, field_boundaries)
# Create mosaic based on cloud cover assessment
mosaic <- create_mosaic(raster_files_final, cloud_coverage_stats, field_boundaries)
} else {
safe_log("No VRT files available for the date range, creating empty mosaic with NA values", "WARNING")
# Create empty mosaic if no files are available
if (length(raster_files_final) == 0) {
stop("No VRT files or final raster files available to create mosaic")
}
mosaic <- terra::rast(raster_files_final[1])
mosaic <- terra::setValues(mosaic, NA)
mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE)
names(mosaic) <- c("Red", "Green", "Blue", "NIR", "CI")
}
# Save the mosaic (without mask files to avoid breaking other scripts)
file_path <- save_mosaic(mosaic, output_dir, file_name_tif, create_plots, save_mask = FALSE)
safe_log(paste("Weekly mosaic processing completed for week", dates$week))
return(file_path)
}
#' Find VRT files within a date range
#'
#' @param vrt_directory Directory containing VRT files
#' @param dates List from date_list() function containing days_filter
#' @return Character vector of VRT file paths
#'
find_vrt_files <- function(vrt_directory, dates) {
# Get all VRT files in directory
vrt_files <- list.files(here::here(vrt_directory), full.names = TRUE)
if (length(vrt_files) == 0) {
warning("No VRT files found in directory: ", vrt_directory)
return(character(0))
}
# Filter files by dates
vrt_list <- purrr::map(dates$days_filter, ~ vrt_files[grepl(pattern = .x, x = vrt_files)]) %>%
purrr::compact() %>%
purrr::flatten_chr()
# Log results
safe_log(paste("Found", length(vrt_list), "VRT files for the date range"))
return(vrt_list)
}
#' Count missing pixels (clouds) in rasters - per field analysis using actual TIF files
#'
#' @param vrt_list List of VRT file paths (used to extract dates for TIF file lookup)
#' @param merged_final_dir Directory containing the actual TIF files (e.g., merged_final_tif)
#' @param field_boundaries Field boundaries (sf object) for calculating cloud cover over field areas only
#' @return Data frame with aggregated cloud statistics for each TIF file (used for mosaic selection)
#'
count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_boundaries = NULL) {
if (length(vrt_list) == 0) {
warning("No VRT files provided for cloud coverage calculation")
return(NULL)
}
tryCatch({
# Extract dates from VRT filenames to find corresponding TIF files
# VRT filenames are like "merged2025-12-18.vrt", TIF filenames are like "2025-12-18.tif"
tif_dates <- gsub(".*([0-9]{4}-[0-9]{2}-[0-9]{2}).*", "\\1", basename(vrt_list))
# Build list of actual TIF files to use
tif_files <- paste0(here::here(merged_final_dir), "/", tif_dates, ".tif")
# Check which TIF files exist
tif_exist <- file.exists(tif_files)
if (!any(tif_exist)) {
warning("No TIF files found in directory: ", merged_final_dir)
return(NULL)
}
tif_files <- tif_files[tif_exist]
safe_log(paste("Found", length(tif_files), "TIF files for cloud coverage assessment"))
# Initialize list to store aggregated results
aggregated_results <- list()
# Process each TIF file
for (tif_idx in seq_along(tif_files)) {
tif_file <- tif_files[tif_idx]
tryCatch({
# Load the TIF file (typically has 5 bands: R, G, B, NIR, CI)
current_raster <- terra::rast(tif_file)
# Extract the CI band (last band)
ci_band <- current_raster[[terra::nlyr(current_raster)]]
# Create a unique field mask for THIS raster's extent
# This handles cases where rasters have different extents due to missing data
total_notna <- NA_real_
total_pixels <- NA_real_
if (!is.null(field_boundaries)) {
tryCatch({
# Create mask specific to this raster's grid
field_mask <- terra::rasterize(field_boundaries, ci_band, field = 1)
# Count pixels within field boundaries (for this specific raster)
total_pixels <- terra::global(field_mask, fun = "notNA")$notNA
# Cloud coverage calculated only over field areas
ci_field_masked <- terra::mask(ci_band, field_mask, maskvalue = NA)
total_notna <- terra::global(ci_field_masked, fun = "notNA")$notNA
}, error = function(e) {
# If field mask creation fails, fall back to entire raster
safe_log(paste("Could not create field mask for", basename(tif_file), ":", e$message), "WARNING")
})
}
# If field mask failed, use entire raster
if (is.na(total_notna)) {
total_notna <- terra::global(ci_band, fun = "notNA")$notNA
total_pixels <- terra::ncell(ci_band)
}
# Calculate cloud coverage percentage (missing = clouds)
missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
aggregated_results[[tif_idx]] <- data.frame(
filename = tif_file,
notNA = total_notna,
total_pixels = total_pixels,
missing_pixels_percentage = missing_pct,
thres_5perc = as.integer(missing_pct < 5),
thres_40perc = as.integer(missing_pct < 45),
stringsAsFactors = FALSE
)
}, error = function(e) {
safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING")
aggregated_results[[tif_idx]] <<- data.frame(
filename = tif_file,
notNA = NA_real_,
total_pixels = NA_real_,
missing_pixels_percentage = 100,
thres_5perc = 0,
thres_40perc = 0,
stringsAsFactors = FALSE
)
})
}
# Combine all aggregated results
aggregated_df <- if (length(aggregated_results) > 0) {
do.call(rbind, aggregated_results)
} else {
data.frame()
}
# Log results
safe_log(paste("Cloud coverage assessment completed for", length(vrt_list), "images"))
# Return aggregated data only
return(aggregated_df)
}, error = function(e) {
warning("Error in cloud coverage calculation: ", e$message)
return(NULL)
})
}
#' Create a mosaic from merged_final_tif files based on cloud coverage
#'
#' @param tif_files List of processed TIF files (5 bands: R, G, B, NIR, CI)
#' @param cloud_coverage_stats Cloud coverage statistics from count_cloud_coverage()
#' @param field_boundaries Field boundaries for masking (optional)
#' @return A SpatRaster object with 5 bands (Red, Green, Blue, NIR, CI)
#'
create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NULL) {
# If no TIF files, return NULL
if (length(tif_files) == 0) {
safe_log("No TIF files available for mosaic creation", "ERROR")
return(NULL)
}
# Validate cloud coverage stats
mosaic_type <- "Unknown" # Track what type of mosaic is being created
if (is.null(cloud_coverage_stats) || nrow(cloud_coverage_stats) == 0) {
safe_log("No cloud coverage statistics available, using all files", "WARNING")
rasters_to_use <- tif_files
mosaic_type <- paste("all", length(tif_files), "available images")
} else {
# Determine best rasters to use based on cloud coverage thresholds
# Count how many images meet each threshold
num_5perc <- sum(cloud_coverage_stats$thres_5perc, na.rm = TRUE)
num_40perc <- sum(cloud_coverage_stats$thres_40perc, na.rm = TRUE)
if (num_5perc > 1) {
# Multiple images with <5% cloud coverage
safe_log(paste("Creating max composite from", num_5perc, "cloud-free images (<5% clouds)"))
mosaic_type <- paste(num_5perc, "cloud-free images (<5% clouds)")
best_coverage <- which(cloud_coverage_stats$thres_5perc > 0)
} else if (num_5perc == 1) {
# Single image with <5% cloud coverage
safe_log("Using single cloud-free image (<5% clouds)")
mosaic_type <- "single cloud-free image (<5% clouds)"
best_coverage <- which(cloud_coverage_stats$thres_5perc > 0)
} else if (num_40perc > 1) {
# Multiple images with <40% cloud coverage
safe_log(paste("Creating max composite from", num_40perc, "partially cloudy images (<40% clouds)"), "WARNING")
mosaic_type <- paste(num_40perc, "partially cloudy images (<40% clouds)")
best_coverage <- which(cloud_coverage_stats$thres_40perc > 0)
} else if (num_40perc == 1) {
# Single image with <40% cloud coverage
safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
mosaic_type <- "single partially cloudy image (<40% clouds)"
best_coverage <- which(cloud_coverage_stats$thres_40perc > 0)
} else {
# No cloud-free images available
safe_log("No cloud-free images available, using all images", "WARNING")
mosaic_type <- paste("all", nrow(cloud_coverage_stats), "available images")
best_coverage <- seq_len(nrow(cloud_coverage_stats))
}
# Get filenames of best-coverage images
# Match by full filename from cloud stats to TIF files
rasters_to_use <- character()
for (idx in best_coverage) {
# Get the full filename from cloud coverage stats
cc_filename <- cloud_coverage_stats$filename[idx]
# Find matching TIF file by full filename
matching_tif <- NULL
for (tif_file in tif_files) {
tif_basename <- basename(tif_file)
if (tif_basename == cc_filename) {
matching_tif <- tif_file
break
}
}
if (!is.null(matching_tif)) {
rasters_to_use <- c(rasters_to_use, matching_tif)
} else {
safe_log(paste("Warning: Could not find TIF file matching cloud stats entry:", cc_filename), "WARNING")
}
}
if (length(rasters_to_use) == 0) {
safe_log("Could not match cloud coverage stats to TIF files, using all files", "WARNING")
rasters_to_use <- tif_files
mosaic_type <- paste("all", length(tif_files), "available images")
}
}
# Load and mosaic the selected rasters
if (length(rasters_to_use) == 1) {
# Single file - just load it
safe_log(paste("Using single image for mosaic:", basename(rasters_to_use)))
mosaic <- terra::rast(rasters_to_use[1])
} else {
# Multiple files - merge handles different extents/grids automatically
safe_log(paste("Creating mosaic from", length(rasters_to_use), "images"))
# Load all rasters with error handling - only keep successful loads
all_rasters <- Filter(Negate(is.null), lapply(rasters_to_use, function(f) {
tryCatch({
terra::rast(f)
}, error = function(e) {
safe_log(paste("Warning: Could not load", basename(f), ":", e$message), "WARNING")
NULL # Return NULL on error, will be filtered out
})
}))
# Check what we loaded
safe_log(paste("Loaded", length(all_rasters), "valid rasters from", length(rasters_to_use), "files"))
if (length(all_rasters) == 0) {
safe_log("No valid rasters to merge", "WARNING")
return(NULL)
}
# Merge all rasters (handles different extents and grids automatically)
if (length(all_rasters) == 1) {
mosaic <- all_rasters[[1]]
safe_log("Using single raster after filtering")
} else {
# Create max composite: take maximum value at each pixel across all dates
# This skips clouds (low/zero CI values) in favor of clear pixels from other dates
mosaic <- tryCatch({
safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds"))
# Check if all rasters have identical grids (extent and resolution)
# This is likely for per-tile mosaics from the same tiling scheme
reference_raster <- all_rasters[[1]]
ref_ext <- terra::ext(reference_raster)
ref_res <- terra::res(reference_raster)
grids_match <- all(sapply(all_rasters[-1], function(r) {
isTRUE(all.equal(terra::ext(r), ref_ext, tolerance = 1e-6)) &&
isTRUE(all.equal(terra::res(r), ref_res, tolerance = 1e-6))
}))
if (grids_match) {
# All rasters have matching grids - no cropping/resampling needed!
safe_log("All rasters have identical grids - stacking directly for max composite")
raster_collection <- terra::sprc(all_rasters)
max_mosaic <- terra::mosaic(raster_collection, fun = "max")
} else {
# Grids don't match - need to crop and resample
safe_log("Rasters have different grids - cropping and resampling to common extent")
# Get extent from field boundaries if available, otherwise use raster union
if (!is.null(field_boundaries)) {
crop_extent <- terra::ext(field_boundaries)
safe_log("Using field boundaries extent for consistent area across all dates")
} else {
# Use union of all extents (covers all data)
crop_extent <- terra::ext(all_rasters[[1]])
for (i in 2:length(all_rasters)) {
crop_extent <- terra::union(crop_extent, terra::ext(all_rasters[[i]]))
}
safe_log("Using raster union extent")
}
# Crop all rasters to common extent
cropped_rasters <- lapply(all_rasters, function(r) {
terra::crop(r, crop_extent)
})
# Resample all cropped rasters to match the first one's grid
reference_grid <- cropped_rasters[[1]]
aligned_rasters <- lapply(cropped_rasters, function(r) {
if (isTRUE(all.equal(terra::ext(r), terra::ext(reference_grid), tolerance = 1e-6)) &&
isTRUE(all.equal(terra::res(r), terra::res(reference_grid), tolerance = 1e-6))) {
return(r) # Already aligned
}
terra::resample(r, reference_grid, method = "near")
})
# Create max composite using mosaic on aligned rasters
raster_collection <- terra::sprc(aligned_rasters)
max_mosaic <- terra::mosaic(raster_collection, fun = "max")
}
max_mosaic
}, error = function(e) {
safe_log(paste("Max composite creation failed:", e$message), "WARNING")
safe_log("Using first raster only as fallback")
all_rasters[[1]]
})
safe_log(paste("Max composite created - taking clearest pixel at each location"))
}
# Ensure we have exactly the required bands: Red, Green, Blue, NIR, CI
required_bands <- c("Red", "Green", "Blue", "NIR", "CI")
available_bands <- names(mosaic)
# Check if all required bands are present
if (!all(required_bands %in% available_bands)) {
safe_log(paste("Warning: Not all required bands found. Available:", paste(available_bands, collapse = ", ")), "WARNING")
}
# Select only the required bands in the correct order
if (all(required_bands %in% available_bands)) {
mosaic <- mosaic[[required_bands]]
safe_log("Selected Red, Green, Blue, NIR, CI bands")
} else {
safe_log(paste("Warning: mosaic has", terra::nlyr(mosaic), "bands, expected 5 (R, G, B, NIR, CI)"), "WARNING")
if (terra::nlyr(mosaic) > 5) {
# Keep only first 5 bands as fallback
mosaic <- terra::subset(mosaic, 1:5)
safe_log("Keeping only first 5 bands as fallback")
}
}
}
# Crop/mask to field boundaries if provided
if (!is.null(field_boundaries)) {
tryCatch({
mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE)
safe_log("Mosaic cropped to field boundaries")
}, error = function(e) {
safe_log(paste("Could not crop to field boundaries:", e$message), "WARNING")
# Return uncropped mosaic
})
}
# Log final mosaic summary
safe_log(paste("✓ Mosaic created from", mosaic_type, "-", terra::nlyr(mosaic),
"bands,", nrow(mosaic), "x", ncol(mosaic), "pixels"))
return(mosaic)
}
#' Save a mosaic raster to disk
#'
#' @param mosaic_raster A SpatRaster object to save
#' @param output_dir Directory to save the output
#' @param file_name Filename for the output raster
#' @param plot_result Whether to create visualizations (default: FALSE)
#' @param save_mask Whether to save cloud masks separately (default: FALSE)
#' @return The file path of the saved raster
#'
save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALSE, save_mask = FALSE) {
# Validate input
if (is.null(mosaic_raster)) {
stop("No mosaic raster provided to save")
}
# Create output directory if it doesn't exist
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Create full file path
file_path <- here::here(output_dir, file_name)
# Get cloud mask if it exists
cloud_mask <- attr(mosaic_raster, "cloud_mask")
# Save raster
terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE)
# Save cloud mask if available and requested
if (!is.null(cloud_mask) && save_mask) {
# Create mask filename by adding _mask before extension
mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name)
mask_file_path <- here::here(output_dir, mask_file_name)
# Save the mask
terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE)
safe_log(paste("Cloud/shadow mask saved to:", mask_file_path))
} else if (!is.null(cloud_mask)) {
safe_log("Cloud mask available but not saved (save_mask = FALSE)")
}
# Create plots if requested
if (plot_result) {
# Plot the CI band
if ("CI" %in% names(mosaic_raster)) {
terra::plot(mosaic_raster$CI, main = paste("CI map", file_name))
}
# Plot RGB image
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) {
terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name))
}
# Plot cloud mask if available
if (!is.null(cloud_mask)) {
terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name),
col = c("red", "green"))
}
# If we have both RGB and cloud mask, create a side-by-side comparison
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) {
old_par <- par(mfrow = c(1, 2))
terra::plotRGB(mosaic_raster, main = "RGB Image")
# Create a colored mask for visualization (red = cloud/shadow, green = clear)
mask_plot <- cloud_mask
terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green"))
par(old_par)
}
}
# Log save completion
safe_log(paste("Mosaic saved to:", file_path))
return(file_path)
}
#' Create weekly mosaic from pre-split tiles with MAX aggregation
#'
#' This function processes tiles created by Script 01 and processed by Script 02.
#' For each of the 25 tiles independently:
#' 1. Collects that tile from all dates in the range
#' 2. Calculates cloud coverage per date
#' 3. Uses create_mosaic logic to select best dates (cloud-clean preferred)
#' 4. Creates MAX composite for that tile
#' 5. Saves to weekly_tile_max/tile_XX.tif
#'
#' Input: merged_final_tif/[DATE]/[TILE_01.tif, TILE_02.tif, ..., TILE_25.tif]
#' Output: weekly_tile_max/tile_01.tif through tile_25.tif (25 weekly MAX tiles)
#'
#' @param dates List from date_list() containing days_filter vector
#' @param merged_final_dir Directory containing processed tiles (merged_final_tif)
#' @param tile_output_dir Directory to save weekly MAX tiles (weekly_tile_max)
#' @param field_boundaries Field boundaries for cloud coverage calculation (optional)
#' @return List of paths to created tile files
#'
create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output_dir, field_boundaries = NULL) {
safe_log("Starting per-tile mosaic creation with cloud-based date selection...")
# Create output directory if needed
dir.create(tile_output_dir, recursive = TRUE, showWarnings = FALSE)
# Step 1: Discover all tiles from all dates and group by tile ID
tile_groups <- list() # Structure: tile_groups$tile_01 = list of files for that tile across dates
for (date in dates$days_filter) {
date_dir <- file.path(merged_final_dir, date)
if (!dir.exists(date_dir)) {
safe_log(paste(" Skipping date:", date, "- directory not found"), "WARNING")
next
}
tile_files <- list.files(date_dir, pattern = "\\.tif$", full.names = TRUE)
if (length(tile_files) == 0) {
safe_log(paste(" No tiles found for date:", date), "WARNING")
next
}
# Extract tile ID from each filename (e.g., "2026-01-02_01.tif" → tile ID is "01")
for (tile_file in tile_files) {
# Extract tile number from filename
tile_basename <- basename(tile_file)
tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", tile_basename)
if (!tile_id %in% names(tile_groups)) {
tile_groups[[tile_id]] <- list()
}
tile_groups[[tile_id]][[length(tile_groups[[tile_id]]) + 1]] <- tile_file
}
}
if (length(tile_groups) == 0) {
stop("No tiles found in date range")
}
safe_log(paste("Found", length(tile_groups), "unique tiles across all dates"))
# Step 2: Process each tile independently
created_tiles <- character()
for (tile_id in names(tile_groups)) {
tile_files_for_this_id <- unlist(tile_groups[[tile_id]])
safe_log(paste("Processing tile", tile_id, "with", length(tile_files_for_this_id), "dates"))
# Step 2a: Calculate cloud coverage for this tile across all dates
cloud_stats_this_tile <- tryCatch({
count_cloud_coverage_for_tile(
tile_files = tile_files_for_this_id,
field_boundaries = field_boundaries
)
}, error = function(e) {
safe_log(paste(" Error calculating cloud coverage for tile", tile_id, "-", e$message), "WARNING")
NULL
})
if (is.null(cloud_stats_this_tile) || nrow(cloud_stats_this_tile) == 0) {
safe_log(paste(" No valid cloud stats for tile", tile_id, "- using all available dates"), "WARNING")
cloud_stats_this_tile <- NULL
}
# Step 2b: Create MAX mosaic for this tile using create_mosaic logic
tile_mosaic <- tryCatch({
create_mosaic(
tif_files = tile_files_for_this_id,
cloud_coverage_stats = cloud_stats_this_tile,
field_boundaries = NULL # Don't crop individual tiles
)
}, error = function(e) {
safe_log(paste(" Error creating mosaic for tile", tile_id, "-", e$message), "WARNING")
NULL
})
if (is.null(tile_mosaic)) {
safe_log(paste(" Failed to create mosaic for tile", tile_id, "- skipping"), "WARNING")
next
}
# DEBUG: Check mosaic content before saving
safe_log(paste(" DEBUG: Mosaic tile", tile_id, "dimensions:", nrow(tile_mosaic), "x", ncol(tile_mosaic)))
safe_log(paste(" DEBUG: Mosaic tile", tile_id, "bands:", terra::nlyr(tile_mosaic)))
# Check first band values
band1 <- tile_mosaic[[1]]
band1_min <- terra::global(band1, fun = "min", na.rm = TRUE)$min
band1_max <- terra::global(band1, fun = "max", na.rm = TRUE)$max
safe_log(paste(" DEBUG: Band 1 MIN=", round(band1_min, 2), "MAX=", round(band1_max, 2)))
# Step 2c: Save this tile's weekly MAX mosaic
# Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1)
tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_",
sprintf("%02d", as.integer(tile_id)), ".tif")
tile_output_path <- file.path(tile_output_dir, tile_filename)
tryCatch({
terra::writeRaster(tile_mosaic, tile_output_path, overwrite = TRUE)
safe_log(paste(" ✓ Saved tile", tile_id, "to:", tile_filename))
created_tiles <- c(created_tiles, tile_output_path)
}, error = function(e) {
safe_log(paste(" Error saving tile", tile_id, "-", e$message), "WARNING")
})
}
safe_log(paste("✓ Created", length(created_tiles), "weekly MAX tiles in", tile_output_dir))
return(created_tiles)
}
#' Calculate cloud coverage for a single tile across multiple dates
#'
#' Helper function for per-tile cloud assessment.
#' Takes tile files from different dates and calculates cloud coverage for each.
#' Cloud coverage is calculated ONLY within field boundaries, so total_pixels
#' varies per tile based on which fields are present in that tile area.
#'
#' @param tile_files Character vector of tile file paths from different dates
#' @param field_boundaries Field boundaries for analysis (required for per-field counting)
#' @return Data frame with cloud stats for each date/tile
#'
count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) {
if (length(tile_files) == 0) {
warning("No tile files provided for cloud coverage calculation")
return(NULL)
}
aggregated_results <- list()
for (idx in seq_along(tile_files)) {
tile_file <- tile_files[idx]
tryCatch({
# Load the tile
current_raster <- terra::rast(tile_file)
# Extract the CI band (last band in 5-band output)
ci_band <- current_raster[[terra::nlyr(current_raster)]]
# Calculate cloud coverage within field boundaries
if (!is.null(field_boundaries)) {
# Create a reference raster template (same extent/resolution as ci_band, but independent of its data)
# This ensures field_mask shows the potential field area even if ci_band is entirely cloudy (all NA)
ref_template <- terra::rast(terra::ext(ci_band), resolution = terra::res(ci_band),
crs = terra::crs(ci_band))
terra::values(ref_template) <- 1 # Fill entire raster with 1s
# Crop and mask to field boundaries: keeps 1 inside fields, NA outside
# This is independent of ci_band's data - represents the potential field area
field_mask <- terra::crop(ref_template, field_boundaries, mask = TRUE)
# Count total potential field pixels from the mask (independent of clouds)
total_pixels <- terra::global(field_mask, fun = "notNA")$notNA
# Now crop and mask CI band to field boundaries to count actual valid (non-cloud) pixels
ci_masked <- terra::crop(ci_band, field_boundaries, mask = TRUE)
total_notna <- terra::global(ci_masked, fun = "notNA")$notNA
} else {
# If no field boundaries provided, use entire tile
total_notna <- terra::global(ci_band, fun = "notNA")$notNA
total_pixels <- terra::ncell(ci_band)
}
# Cloud coverage percentage (missing = clouds)
missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
aggregated_results[[idx]] <- data.frame(
filename = basename(tile_file), # Keep full filename: 2026-01-07_03.tif
notNA = total_notna,
total_pixels = total_pixels,
missing_pixels_percentage = missing_pct,
thres_5perc = as.integer(missing_pct < 5),
thres_40perc = as.integer(missing_pct < 45),
stringsAsFactors = FALSE
)
}, error = function(e) {
safe_log(paste("Error processing tile:", basename(tile_file), "-", e$message), "WARNING")
aggregated_results[[idx]] <<- data.frame(
filename = tile_file,
notNA = NA_real_,
total_pixels = NA_real_,
missing_pixels_percentage = 100,
thres_5perc = 0,
thres_40perc = 0,
stringsAsFactors = FALSE
)
})
}
# Combine results
aggregated_df <- if (length(aggregated_results) > 0) {
do.call(rbind, aggregated_results)
} else {
data.frame()
}
return(aggregated_df)
}

View file

@ -1,28 +1,55 @@
# 80_CALCULATE_KPIS.R (CONSOLIDATED KPI CALCULATION)
# ============================================================================
# UNIFIED KPI CALCULATION SCRIPT
# SCRIPT 80: Key Performance Indicator (KPI) Calculation
# ============================================================================
# PURPOSE:
# Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes
# field uniformity, growth trends (4-week and 8-week), area change detection,
# TCH forecasts, stress identification, and weed presence. Generates
# comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting.
#
# This script combines:
# 1. Per-field weekly analysis (from 09c: field-level trends, phases, statuses)
# 2. Farm-level KPI metrics (from old 09: 6 high-level indicators)
# INPUT DATA:
# - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif
# - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
# - Source 3: Historical field stats (RDS from previous weeks)
#
# FEATURES:
# - Per-field analysis with SC-64 enhancements (4-week trends, CI percentiles, etc.)
# - Farm-level KPI calculation (6 metrics for executive overview)
# - Parallel processing (tile-aware, 1000+ fields supported)
# - Comprehensive Excel + RDS + CSV exports (21 columns per spec)
# - Test mode for development
# CRITICAL INTEGRATIONS:
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/output/
# - Format: Excel (.xlsx), CSV (.csv), RDS (.rds)
# - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata
#
# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307)
# [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv
# Columns: field, imminent_prob, detected_prob, week, year
# [✓] LEFT JOIN to field_analysis_df by field
# [✓] Use actual harvest probability data instead of placeholder
# USAGE:
# Rscript 80_calculate_kpis.R [project] [week] [year]
#
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23)
# [✓] Load harvest.xlsx with planting_date (season_start)
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
# - week: ISO week number (numeric, 1-53, default current week)
# - year: ISO year (numeric, default current year)
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - uses 80_utils_cane_supply.R (placeholder)
# - agronomic_support (AURA): Yes - uses 80_utils_agronomic_support.R (6 KPI funcs)
#
# DEPENDENCIES:
# - Packages: terra, sf, tidyverse, lubridate, writexl, spdep
# - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
# - Input data: Weekly mosaic TIFFs (Script 40 output)
# - Data directories: weekly_mosaic/, output/ (created if missing)
#
# NOTES:
# - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast,
# Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling %
# - Client-aware: Conditional sourcing based on client_config$script_90_compatible
# - Exports: 21-column output with field-level metrics, phase, status, alerts
# - Supports 4-week and 8-week trend analysis from historical RDS cache
# - Critical dependency for Scripts 90/91 (reporting/dashboards)
# - Uses Moran's I for spatial clustering detection (weed/stress patterns)
#
#
# ============================================================================
# [✓] Extract planting dates per field
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
#
@ -96,48 +123,77 @@ WEEKS_FOR_CV_TREND_LONG <- 8
# ============================================================================
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(dplyr)
library(tidyr)
library(lubridate)
library(readr)
library(readxl)
library(writexl)
library(purrr)
library(furrr)
library(future)
library(caret)
library(CAST)
library(randomForest)
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Spatial data handling
library(sf) # For reading/manipulating field boundaries (GeoJSON)
library(terra) # For raster operations (reading mosaic TIFFs)
# Data manipulation
library(dplyr) # For data wrangling (filter, mutate, group_by, summarize)
library(tidyr) # For data reshaping (pivot_longer, pivot_wider, gather)
library(lubridate) # For date/time operations (week extraction, date arithmetic)
# File I/O
library(readr) # For reading CSV files (harvest predictions from Python)
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
library(writexl) # For writing Excel outputs (KPI summary tables)
library(progress) # For progress bars during field processing
# ML/Analysis (optional - only for harvest model inference)
tryCatch({
library(torch)
library(torch) # For PyTorch model inference (harvest readiness prediction)
}, error = function(e) {
message("Note: torch package not available - harvest model inference will be skipped")
})
})
# ============================================================================
# LOAD CONFIGURATION - MUST BE DONE FIRST
# ============================================================================
# Parameters must be loaded early to determine client type and paths
tryCatch({
source(here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Get client configuration from global project setup
# NOTE: This cannot be done until parameters_project.R is sourced
# We determine client_type from the current project_dir (if running in main() context)
# For now, set a placeholder that will be overridden in main()
if (exists("project_dir", envir = .GlobalEnv)) {
temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv))
} else {
temp_client_type <- "cane_supply" # Safe default
}
temp_client_config <- get_client_kpi_config(temp_client_type)
# ============================================================================
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
# ============================================================================
# All clients use the common utilities (shared statistical functions, reporting)
tryCatch({
source(here("r_app", "80_weekly_stats_utils.R"))
source(here("r_app", "80_utils_common.R"))
}, error = function(e) {
stop("Error loading 80_weekly_stats_utils.R: ", e$message)
stop("Error loading 80_utils_common.R: ", e$message)
})
# Load both client-specific utilities (functions will be available for both workflows)
# This avoids needing to determine client type at startup time
message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...")
tryCatch({
source(here("r_app", "80_utils_agronomic_support.R"))
}, error = function(e) {
stop("Error loading 80_utils_agronomic_support.R: ", e$message)
})
tryCatch({
source(here("r_app", "80_report_building_utils.R"))
source(here("r_app", "80_utils_cane_supply.R"))
}, error = function(e) {
stop("Error loading 80_report_building_utils.R: ", e$message)
})
tryCatch({
source(here("r_app", "kpi_utils.R"))
}, error = function(e) {
stop("Error loading kpi_utils.R: ", e$message)
stop("Error loading 80_utils_cane_supply.R: ", e$message)
})
# ============================================================================
@ -176,9 +232,6 @@ STATUS_TRIGGERS <- data.frame(
)
# ============================================================================
# MAIN
# ============================================================================
main <- function() {
# Parse command-line arguments
@ -187,7 +240,8 @@ main <- function() {
# end_date (arg 1)
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
as.Date(args[1])
# Parse date explicitly in YYYY-MM-DD format from command line
as.Date(args[1], format = "%Y-%m-%d")
} else if (exists("end_date", envir = .GlobalEnv)) {
global_date <- get("end_date", envir = .GlobalEnv)
# Check if it's a valid Date with length > 0
@ -233,7 +287,7 @@ main <- function() {
message(strrep("=", 70))
message("Date:", format(end_date, "%Y-%m-%d"))
message("Project:", project_dir)
message("Mode: Per-field analysis (SC-64) + Farm-level KPIs")
message("Mode: Conditional KPI calculation based on client type")
message("")
# Load configuration and utilities
@ -245,48 +299,151 @@ main <- function() {
stop("Error loading parameters_project.R: ", e$message)
})
# Initialize project directories and configuration
setup <- setup_project_directories(project_dir)
# DETERMINE CLIENT TYPE AND KPI CONFIGURATION
client_type <- get_client_type(project_dir)
client_config <- get_client_kpi_config(client_type)
# Assign to global environment so utilities and downstream scripts can access it
assign("client_config", client_config, envir = .GlobalEnv)
message("Client Type:", client_type)
message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
# Use centralized paths from setup object (no need for file.path calls)
weekly_mosaic <- setup$weekly_mosaic_dir
daily_vals_dir <- setup$daily_ci_vals_dir
tryCatch({
source(here("r_app", "30_growth_model_utils.R"))
}, error = function(e) {
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
})
# ========== PER-FIELD ANALYSIS (SC-64) ==========
# CONDITIONAL EXECUTION BASED ON CLIENT TYPE
# ============================================
if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) {
# AURA WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility
message("\n", strrep("=", 70))
message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
message(strrep("=", 70))
# Prepare inputs for KPI calculation (already created by setup_project_directories)
reports_dir_kpi <- setup$kpi_reports_dir
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
# Load field boundaries for AURA workflow (use data_dir from setup)
message("\nLoading field boundaries for AURA KPI calculation...")
tryCatch({
boundaries_result <- load_field_boundaries(setup$data_dir)
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
field_boundaries_sf <- boundaries_result$field_boundaries_sf
} else {
field_boundaries_sf <- boundaries_result
}
if (nrow(field_boundaries_sf) == 0) {
stop("No fields loaded from boundaries")
}
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
}, error = function(e) {
stop("ERROR loading field boundaries: ", e$message)
})
# Load harvesting data
if (!exists("harvesting_data")) {
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
}
# Extract current week/year from end_date
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%G"))
# Call with correct signature
kpi_results <- calculate_all_kpis(
field_boundaries_sf = field_boundaries_sf,
current_week = current_week,
current_year = current_year,
current_mosaic_dir = setup$weekly_mosaic_dir,
harvesting_data = harvesting_data,
ci_rds_path = cumulative_CI_vals_dir,
output_dir = reports_dir_kpi
)
cat("\n=== AURA KPI CALCULATION COMPLETE ===\n")
cat("Summary tables saved for Script 90 integration\n")
cat("Output directory:", reports_dir_kpi, "\n\n")
} else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) {
# CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment
message("\n", strrep("=", 70))
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
message(strrep("=", 70))
# Set reports_dir for CANE_SUPPLY workflow (used by export functions)
reports_dir <- setup$kpi_reports_dir
data_dir <- setup$data_dir
# Continue with existing per-field analysis code below
message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
message(strrep("-", 70))
# Calculate ISO week numbers and ISO years using helper from kpi_utils.R
weeks <- calculate_week_numbers(end_date)
current_week <- weeks$current_week
current_iso_year <- weeks$current_iso_year
current_year <- weeks$current_year
previous_week <- weeks$previous_week
previous_iso_year <- weeks$previous_iso_year
previous_year <- weeks$previous_year
message(paste("Week:", current_week, "/ ISO Year:", current_iso_year))
message(paste("Week:", current_week, "/ Year (ISO 8601):", year))
# Find tile files - approach from Script 20
message("Finding tile files...")
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, current_iso_year)
# Find per-field weekly mosaics
message("Finding per-field weekly mosaics...")
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
# Detect grid size subdirectory
detected_grid_size <- NA
if (dir.exists(weekly_tile_max)) {
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
detected_grid_size <- grid_patterns[1]
mosaic_dir <- file.path(weekly_tile_max, detected_grid_size)
message(paste(" Using grid-size subdirectory:", detected_grid_size))
if (!dir.exists(weekly_mosaic)) {
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
"\nScript 40 (mosaic creation) must be run first."))
}
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
if (length(field_dirs) == 0) {
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
}
# Verify we have mosaics for this week
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
per_field_files <- c()
for (field in field_dirs) {
field_mosaic_dir <- file.path(weekly_mosaic, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files <- c(per_field_files, files)
}
}
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
if (length(tile_files) == 0) {
stop(paste("No tile files found for week", current_week, current_iso_year, "in", mosaic_dir))
if (length(per_field_files) == 0) {
stop(paste("ERROR: No mosaics found for week", current_week, "year", year,
"\nExpected pattern:", single_file_pattern,
"\nChecked:", weekly_mosaic))
}
message(paste(" Found", length(tile_files), "tiles"))
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
mosaic_mode <- "per-field"
mosaic_dir <- weekly_mosaic
# Load field boundaries
tryCatch({
@ -316,10 +473,11 @@ main <- function() {
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir,
historical_data <- load_historical_field_data(project_dir, current_week, year, reports_dir,
num_weeks = num_weeks_to_load,
auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf)
field_boundaries_sf = field_boundaries_sf,
daily_vals_dir = daily_vals_dir)
# Load harvest.xlsx for planting dates (season_start)
message("\nLoading harvest data from harvest.xlsx for planting dates...")
@ -353,14 +511,15 @@ main <- function() {
)
}
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
# ============================================================================
# NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
# ============================================================================
# Build per-field configuration
message("\nPreparing mosaic configuration for statistics calculation...")
message(" ✓ Using per-field mosaic architecture (1 TIFF per field)")
# Build tile grid (needed by calculate_field_statistics)
message("\nBuilding tile grid for current week...")
tile_grid <- build_tile_grid(mosaic_dir, current_week, current_iso_year)
# Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
field_grid <- list(
mosaic_dir = mosaic_dir,
mode = "per-field"
)
message("\nUsing modular RDS-based approach for weekly statistics...")
@ -371,7 +530,7 @@ main <- function() {
year = current_iso_year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = tile_grid$mosaic_dir,
mosaic_dir = field_grid$mosaic_dir,
reports_dir = reports_dir,
report_date = end_date
)
@ -386,10 +545,10 @@ main <- function() {
prev_stats <- load_or_calculate_weekly_stats(
week_num = previous_week,
year = previous_iso_year,
year = previous_year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = tile_grid$mosaic_dir,
mosaic_dir = field_grid$mosaic_dir,
reports_dir = reports_dir,
report_date = prev_report_date
)
@ -409,9 +568,11 @@ main <- function() {
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
# Load weekly harvest probabilities from script 31 (if available)
# Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_iso_year))
harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
harvest_prob_file <- file.path(harvest_prob_dir,
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({
@ -854,6 +1015,14 @@ main <- function() {
cat(" - Per-field data exported\n")
cat(" - Farm-level KPIs calculated\n")
cat(" - All outputs in:", reports_dir, "\n\n")
} else {
# Unknown client type - log warning and exit
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
cat("\n⚠ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
}
}
if (sys.nframe() == 0) {

View file

@ -1,258 +0,0 @@
# 80_REPORT_BUILDING_UTILS.R
# ============================================================================
# UTILITY FUNCTIONS FOR REPORT GENERATION AND EXCEL/CSV EXPORT
#
# This file contains reusable functions for:
# - Field analysis summary generation
# - Excel/CSV/RDS export functionality
# - Farm-level KPI aggregation and summary
# - Tile-based KPI extraction (alternative calculation method)
#
# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts
# ============================================================================
# ============================================================================
# SUMMARY GENERATION
# ============================================================================
generate_field_analysis_summary <- function(field_df) {
message("Generating summary statistics...")
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE)
tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE)
grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE)
maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE)
unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE)
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
total_fields <- nrow(field_df)
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
summary_df <- data.frame(
Category = c(
"--- PHASE DISTRIBUTION ---",
"Germination",
"Tillering",
"Grand Growth",
"Maturation",
"Unknown phase",
"--- STATUS TRIGGERS ---",
"Harvest ready",
"Stress detected",
"Strong recovery",
"Growth on track",
"Germination complete",
"Germination started",
"No trigger",
"--- CLOUD COVERAGE (FIELDS) ---",
"Clear view",
"Partial coverage",
"No image available",
"--- CLOUD COVERAGE (ACREAGE) ---",
"Clear view",
"Partial coverage",
"No image available",
"--- TOTAL ---",
"Total Acreage"
),
Acreage = c(
NA,
round(germination_acreage, 2),
round(tillering_acreage, 2),
round(grand_growth_acreage, 2),
round(maturation_acreage, 2),
round(unknown_phase_acreage, 2),
NA,
round(harvest_ready_acreage, 2),
round(stress_acreage, 2),
round(recovery_acreage, 2),
round(growth_on_track_acreage, 2),
round(germination_complete_acreage, 2),
round(germination_started_acreage, 2),
round(no_trigger_acreage, 2),
NA,
paste0(clear_fields, " fields"),
paste0(partial_fields, " fields"),
paste0(no_image_fields, " fields"),
NA,
round(clear_acreage, 2),
round(partial_acreage, 2),
round(no_image_acreage, 2),
NA,
round(total_acreage, 2)
),
stringsAsFactors = FALSE
)
return(summary_df)
}
# ============================================================================
# EXPORT FUNCTIONS
# ============================================================================
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
message("Exporting per-field analysis to Excel, CSV, and RDS...")
field_df_rounded <- field_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
# Handle NULL summary_df
summary_df_rounded <- if (!is.null(summary_df)) {
summary_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
} else {
NULL
}
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
if (!dir.exists(output_subdir)) {
dir.create(output_subdir, recursive = TRUE)
}
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename)
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
# Build sheets list dynamically
sheets <- list(
"Field Data" = field_df_rounded
)
if (!is.null(summary_df_rounded)) {
sheets[["Summary"]] <- summary_df_rounded
}
write_xlsx(sheets, excel_path)
message(paste("✓ Field analysis Excel exported to:", excel_path))
kpi_data <- list(
field_analysis = field_df_rounded,
field_analysis_summary = summary_df_rounded,
metadata = list(
current_week = current_week,
year = year,
project = project_dir,
created_at = Sys.time()
)
)
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
rds_path <- file.path(reports_dir, "kpis", rds_filename)
saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path))
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
csv_path <- file.path(output_subdir, csv_filename)
write_csv(field_df_rounded, csv_path)
message(paste("✓ Field analysis CSV exported to:", csv_path))
return(list(excel = excel_path, rds = rds_path, csv = csv_path))
}
# ============================================================================
# TILE-BASED KPI EXTRACTION (Alternative calculation method)
# ============================================================================
# [COMMENTED OUT / UNUSED - kept for reference]
# These functions provide tile-based extraction as an alternative to field_statistics approach
# Currently replaced by calculate_field_statistics() in 80_weekly_stats_utils.R
# Uncomment if parallel processing of tiles is needed in future
# calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) {
# message("Calculating field-level KPI statistics from tiles...")
#
# tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
# tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE)
#
# if (length(tile_files) == 0) {
# message("No tiles found for week", week_num, year)
# return(NULL)
# }
#
# message(paste("Processing", length(tile_files), "tiles in parallel..."))
#
# field_kpi_list <- furrr::future_map(
# tile_files,
# ~ process_single_kpi_tile(
# tile_file = .,
# field_boundaries_sf = field_boundaries_sf,
# tile_grid = tile_grid
# ),
# .progress = TRUE,
# .options = furrr::furrr_options(seed = TRUE)
# )
#
# field_kpi_stats <- dplyr::bind_rows(field_kpi_list)
#
# if (nrow(field_kpi_stats) == 0) {
# message(" No KPI data extracted from tiles")
# return(NULL)
# }
#
# message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields"))
# return(field_kpi_stats)
# }
# process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) {
# # Helper function for calculate_field_kpis_from_tiles
# tryCatch({
# tile_basename <- basename(tile_file)
# tile_raster <- terra::rast(tile_file)
# ci_band <- tile_raster[[1]]
#
# field_bbox <- sf::st_bbox(field_boundaries_sf)
# ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out")
#
# extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE)
#
# tile_results <- data.frame()
# tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename))
#
# for (field_idx in seq_len(nrow(field_boundaries_sf))) {
# field_id <- field_boundaries_sf$field[field_idx]
# mean_ci <- extracted_vals[field_idx, 2]
#
# if (is.na(mean_ci)) {
# next
# }
#
# tile_results <- rbind(tile_results, data.frame(
# field = field_id,
# tile_id = tile_id_match,
# tile_file = tile_basename,
# mean_ci = round(mean_ci, 4),
# stringsAsFactors = FALSE
# ))
# }
#
# return(tile_results)
#
# }, error = function(e) {
# message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message))
# return(data.frame())
# })
# }
# calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf,
# harvesting_data, cumulative_CI_vals_dir,
# weekly_CI_mosaic, reports_dir, current_week, year,
# tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") {
# # Farm-level KPI calculation using tile-based extraction (alternative approach)
# # [Implementation kept as reference for alternative calculation method]
# }

View file

@ -0,0 +1,666 @@
# 80_UTILS_AGRONOMIC_SUPPORT.R
# ============================================================================
# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support)
#
# Contains all 6 AURA KPI calculation functions and helpers:
# - Field uniformity KPI (CV-based, spatial autocorrelation)
# - Area change KPI (week-over-week CI changes)
# - TCH forecasted KPI (tonnage projections from harvest data)
# - Growth decline KPI (trend analysis)
# - Weed presence KPI (field fragmentation detection)
# - Gap filling KPI (interpolation quality)
# - KPI reporting (summary tables, field details, text interpretation)
# - KPI export (Excel, RDS, data export)
#
# Orchestrator: calculate_all_kpis()
# Dependencies: 00_common_utils.R (safe_log), sourced from common
# Used by: 80_calculate_kpis.R (when client_type == "agronomic_support")
# ============================================================================
library(terra)
library(sf)
library(dplyr)
library(tidyr)
library(readxl)
library(writexl)
library(spdep)
library(caret)
library(CAST)
# ============================================================================
# SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R)
# ============================================================================
# The following helper functions have been moved to 80_utils_common.R:
# - calculate_cv()
# - calculate_change_percentages()
# - calculate_spatial_autocorrelation()
# - extract_ci_values()
# - calculate_week_numbers()
# - load_field_ci_raster()
# - load_weekly_ci_mosaic()
# - prepare_predictions()
#
# These are now sourced from common utils and shared by all client types.
# ============================================================================
#' Prepare harvest predictions and ensure proper alignment with field data
prepare_predictions <- function(harvest_model, field_data, scenario = "optimistic") {
if (is.null(harvest_model) || is.null(field_data)) {
return(NULL)
}
tryCatch({
scenario_factor <- switch(scenario,
"pessimistic" = 0.85,
"realistic" = 1.0,
"optimistic" = 1.15,
1.0)
predictions <- field_data %>%
mutate(tch_forecasted = field_data$mean_ci * scenario_factor)
return(predictions)
}, error = function(e) {
message(paste("Error preparing predictions:", e$message))
return(NULL)
})
}
# ============================================================================
# AURA KPI CALCULATION FUNCTIONS (6 KPIS)
# ============================================================================
#' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation
#'
#' Measures how uniform crop development is across the field.
#' Low CV + high positive Moran's I = excellent uniformity
#'
#' @param ci_pixels_by_field List of CI pixel arrays for each field
#' @param field_boundaries_sf SF object with field geometries
#' @param ci_band Raster band with CI values
#'
#' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation
calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL) {
result <- data.frame(
field_idx = integer(),
cv_value = numeric(),
morans_i = numeric(),
uniformity_score = numeric(),
interpretation = character(),
stringsAsFactors = FALSE
)
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
ci_pixels <- ci_pixels_by_field[[field_idx]]
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
result <- rbind(result, data.frame(
field_idx = field_idx,
cv_value = NA_real_,
morans_i = NA_real_,
uniformity_score = NA_real_,
interpretation = "No data",
stringsAsFactors = FALSE
))
next
}
cv_val <- calculate_cv(ci_pixels)
morans_i <- NA_real_
if (!is.null(ci_band)) {
morans_result <- calculate_spatial_autocorrelation(ci_pixels, field_boundaries_sf[field_idx, ])
if (is.list(morans_result)) {
morans_i <- morans_result$morans_i
} else {
morans_i <- morans_result
}
}
# Normalize CV (0-1 scale, invert so lower CV = higher score)
cv_normalized <- min(cv_val / 0.3, 1) # 0.3 = threshold for CV
cv_score <- 1 - cv_normalized
# Normalize Moran's I (-1 to 1 scale, shift to 0-1)
morans_normalized <- if (!is.na(morans_i)) {
(morans_i + 1) / 2
} else {
0.5
}
uniformity_score <- 0.7 * cv_score + 0.3 * morans_normalized
# Interpretation
if (is.na(cv_val)) {
interpretation <- "No data"
} else if (cv_val < 0.08) {
interpretation <- "Excellent uniformity"
} else if (cv_val < 0.15) {
interpretation <- "Good uniformity"
} else if (cv_val < 0.25) {
interpretation <- "Acceptable uniformity"
} else if (cv_val < 0.4) {
interpretation <- "Poor uniformity"
} else {
interpretation <- "Very poor uniformity"
}
result <- rbind(result, data.frame(
field_idx = field_idx,
cv_value = cv_val,
morans_i = morans_i,
uniformity_score = round(uniformity_score, 3),
interpretation = interpretation,
stringsAsFactors = FALSE
))
}
return(result)
}
#' KPI 2: Calculate area change metric (week-over-week CI changes)
#'
#' Tracks the percentage change in CI between current and previous week
#'
#' @param current_stats Current week field statistics (from extract_field_statistics_from_ci)
#' @param previous_stats Previous week field statistics
#'
#' @return Data frame with field-level CI changes
calculate_area_change_kpi <- function(current_stats, previous_stats) {
result <- calculate_change_percentages(current_stats, previous_stats)
# Add interpretation
result$interpretation <- NA_character_
for (i in seq_len(nrow(result))) {
change <- result$mean_ci_pct_change[i]
if (is.na(change)) {
result$interpretation[i] <- "No previous data"
} else if (change > 15) {
result$interpretation[i] <- "Rapid growth"
} else if (change > 5) {
result$interpretation[i] <- "Positive growth"
} else if (change > -5) {
result$interpretation[i] <- "Stable"
} else if (change > -15) {
result$interpretation[i] <- "Declining"
} else {
result$interpretation[i] <- "Rapid decline"
}
}
return(result)
}
#' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare)
#'
#' Projects final harvest tonnage based on CI growth trajectory
#'
#' @param field_statistics Current field statistics
#' @param harvesting_data Historical harvest data (with yield observations)
#' @param field_boundaries_sf Field geometries
#'
#' @return Data frame with field-level TCH forecasts
calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) {
result <- data.frame(
field_idx = field_statistics$field_idx,
mean_ci = field_statistics$mean_ci,
tch_forecasted = NA_real_,
tch_lower_bound = NA_real_,
tch_upper_bound = NA_real_,
confidence = NA_character_,
stringsAsFactors = FALSE
)
# Base TCH model: TCH = 50 + (CI * 10)
# This is a simplified model; production use should include more variables
for (i in seq_len(nrow(result))) {
if (is.na(result$mean_ci[i])) {
result$confidence[i] <- "No data"
next
}
ci_val <- result$mean_ci[i]
# Simple linear model
tch_est <- 50 + (ci_val * 10)
# Confidence interval based on CI range
tch_lower <- tch_est * 0.85
tch_upper <- tch_est * 1.15
result$tch_forecasted[i] <- round(tch_est, 2)
result$tch_lower_bound[i] <- round(tch_lower, 2)
result$tch_upper_bound[i] <- round(tch_upper, 2)
result$confidence[i] <- "Medium"
}
return(result)
}
#' KPI 4: Calculate growth decline indicator
#'
#' Identifies fields with negative growth trajectory
#'
#' @param ci_values_list List of CI values for each field (multiple weeks)
#'
#' @return Data frame with field-level decline indicators
calculate_growth_decline_kpi <- function(ci_values_list) {
result <- data.frame(
field_idx = seq_len(length(ci_values_list)),
four_week_trend = NA_real_,
trend_interpretation = NA_character_,
decline_severity = NA_character_,
stringsAsFactors = FALSE
)
for (field_idx in seq_len(length(ci_values_list))) {
ci_vals <- ci_values_list[[field_idx]]
if (is.null(ci_vals) || length(ci_vals) < 2) {
result$trend_interpretation[field_idx] <- "Insufficient data"
next
}
ci_vals <- ci_vals[!is.na(ci_vals)]
if (length(ci_vals) < 2) {
result$trend_interpretation[field_idx] <- "Insufficient data"
next
}
# Calculate linear trend
weeks <- seq_along(ci_vals)
lm_fit <- lm(ci_vals ~ weeks)
slope <- coef(lm_fit)["weeks"]
result$four_week_trend[field_idx] <- round(as.numeric(slope), 3)
if (slope > 0.1) {
result$trend_interpretation[field_idx] <- "Strong growth"
result$decline_severity[field_idx] <- "None"
} else if (slope > 0) {
result$trend_interpretation[field_idx] <- "Weak growth"
result$decline_severity[field_idx] <- "None"
} else if (slope > -0.1) {
result$trend_interpretation[field_idx] <- "Slight decline"
result$decline_severity[field_idx] <- "Low"
} else if (slope > -0.3) {
result$trend_interpretation[field_idx] <- "Moderate decline"
result$decline_severity[field_idx] <- "Medium"
} else {
result$trend_interpretation[field_idx] <- "Strong decline"
result$decline_severity[field_idx] <- "High"
}
}
return(result)
}
#' KPI 5: Calculate weed presence indicator
#'
#' Detects field fragmentation/patchiness (potential weed/pest pressure)
#'
#' @param ci_pixels_by_field List of CI pixel arrays for each field
#'
#' @return Data frame with fragmentation indicators
calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
result <- data.frame(
field_idx = seq_len(length(ci_pixels_by_field)),
cv_value = NA_real_,
low_ci_percent = NA_real_,
fragmentation_index = NA_real_,
weed_pressure_risk = NA_character_,
stringsAsFactors = FALSE
)
for (field_idx in seq_len(length(ci_pixels_by_field))) {
ci_pixels <- ci_pixels_by_field[[field_idx]]
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
result$weed_pressure_risk[field_idx] <- "No data"
next
}
ci_pixels <- ci_pixels[!is.na(ci_pixels)]
if (length(ci_pixels) == 0) {
result$weed_pressure_risk[field_idx] <- "No data"
next
}
cv_val <- calculate_cv(ci_pixels)
low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100
fragmentation <- cv_val * low_ci_pct / 100
result$cv_value[field_idx] <- cv_val
result$low_ci_percent[field_idx] <- round(low_ci_pct, 2)
result$fragmentation_index[field_idx] <- round(fragmentation, 3)
if (is.na(fragmentation)) {
result$weed_pressure_risk[field_idx] <- "No data"
} else if (fragmentation > 0.15) {
result$weed_pressure_risk[field_idx] <- "High"
} else if (fragmentation > 0.08) {
result$weed_pressure_risk[field_idx] <- "Medium"
} else if (fragmentation > 0.04) {
result$weed_pressure_risk[field_idx] <- "Low"
} else {
result$weed_pressure_risk[field_idx] <- "Minimal"
}
}
return(result)
}
#' KPI 6: Calculate gap filling quality (data interpolation success)
#'
#' Measures how well cloud/missing data was interpolated during growth model
#'
#' @param ci_rds_path Path to combined CI RDS file (before/after interpolation)
#'
#' @return Data frame with gap-filling quality metrics
calculate_gap_filling_kpi <- function(ci_rds_path) {
# If ci_rds_path is NULL or not a valid path, return placeholder
if (is.null(ci_rds_path) || !is.character(ci_rds_path) || length(ci_rds_path) == 0) {
return(NULL)
}
# If ci_rds_path is a directory, find the cumulative CI file
if (dir.exists(ci_rds_path)) {
ci_files <- list.files(ci_rds_path, pattern = "^All_pivots.*\\.rds$", full.names = TRUE)
if (length(ci_files) == 0) {
return(NULL)
}
ci_rds_path <- ci_files[1]
}
if (!file.exists(ci_rds_path)) {
return(NULL)
}
tryCatch({
ci_data <- readRDS(ci_rds_path)
# ci_data should be a wide matrix: fields × weeks
# NA values = missing data before interpolation
# (Gap filling is done during growth model stage)
result <- data.frame(
field_idx = seq_len(nrow(ci_data)),
na_percent_pre_interpolation = NA_real_,
na_percent_post_interpolation = NA_real_,
gap_filling_success = NA_character_,
stringsAsFactors = FALSE
)
for (field_idx in seq_len(nrow(ci_data))) {
na_count <- sum(is.na(ci_data[field_idx, ]))
na_pct <- na_count / ncol(ci_data) * 100
if (na_pct == 0) {
result$gap_filling_success[field_idx] <- "No gaps (100% data)"
} else if (na_pct < 10) {
result$gap_filling_success[field_idx] <- "Excellent"
} else if (na_pct < 25) {
result$gap_filling_success[field_idx] <- "Good"
} else if (na_pct < 40) {
result$gap_filling_success[field_idx] <- "Fair"
} else {
result$gap_filling_success[field_idx] <- "Poor"
}
result$na_percent_pre_interpolation[field_idx] <- round(na_pct, 2)
}
return(result)
}, error = function(e) {
message(paste("Error calculating gap filling KPI:", e$message))
return(NULL)
})
}
# ============================================================================
# KPI ORCHESTRATOR AND REPORTING
# ============================================================================
#' Create summary tables for all 6 KPIs
#'
#' @param all_kpis List containing results from all 6 KPI functions
#'
#' @return List of summary data frames ready for reporting
create_summary_tables <- function(all_kpis) {
kpi_summary <- list(
uniformity = all_kpis$uniformity %>%
select(field_idx, cv_value, morans_i, uniformity_score, interpretation),
area_change = all_kpis$area_change %>%
select(field_idx, mean_ci_pct_change, interpretation),
tch_forecast = all_kpis$tch_forecasted %>%
select(field_idx, mean_ci, tch_forecasted, tch_lower_bound, tch_upper_bound, confidence),
growth_decline = all_kpis$growth_decline %>%
select(field_idx, four_week_trend, trend_interpretation, decline_severity),
weed_pressure = all_kpis$weed_presence %>%
select(field_idx, fragmentation_index, weed_pressure_risk),
gap_filling = if (!is.null(all_kpis$gap_filling)) {
all_kpis$gap_filling %>%
select(field_idx, na_percent_pre_interpolation, gap_filling_success)
} else {
NULL
}
)
return(kpi_summary)
}
#' Create detailed field-by-field KPI report
#'
#' @param field_df Data frame with field identifiers and acreage
#' @param all_kpis List with all KPI results
#' @param field_boundaries_sf SF object with field boundaries
#'
#' @return Data frame with one row per field, all KPI columns
create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) {
result <- field_df %>%
left_join(
all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_interpretation = interpretation),
by = c("field_idx")
) %>%
left_join(
all_kpis$area_change %>% select(field_idx, mean_ci_pct_change),
by = c("field_idx")
) %>%
left_join(
all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted),
by = c("field_idx")
) %>%
left_join(
all_kpis$growth_decline %>% select(field_idx, decline_severity),
by = c("field_idx")
) %>%
left_join(
all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk),
by = c("field_idx")
)
return(result)
}
#' Generate KPI text interpretation for inclusion in Word report
#'
#' @param all_kpis List with all KPI results
#'
#' @return Character string with formatted KPI summary text
create_field_kpi_text <- function(all_kpis) {
text_parts <- c(
"## AURA KPI ANALYSIS SUMMARY\n",
"### Field Uniformity\n",
paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n",
"### Growth Trends\n",
paste(all_kpis$growth_decline$trend_interpretation, collapse = "; "), "\n",
"### Weed/Pest Pressure\n",
paste(all_kpis$weed_presence$weed_pressure_risk, collapse = "; "), "\n"
)
return(paste(text_parts, collapse = ""))
}
#' Export detailed KPI data to Excel/RDS
#'
#' @param all_kpis List with all KPI results
#' @param kpi_summary List with summary tables
#' @param output_dir Directory for output files
#' @param week Week number
#' @param year Year
#'
#' @return List of output file paths
export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year) {
# Ensure output directory exists
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
# Export all KPI tables to a single Excel file
excel_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".xlsx")
sheets <- list(
"Uniformity" = as.data.frame(kpi_summary$uniformity),
"Area_Change" = as.data.frame(kpi_summary$area_change),
"TCH_Forecast" = as.data.frame(kpi_summary$tch_forecast),
"Growth_Decline" = as.data.frame(kpi_summary$growth_decline),
"Weed_Pressure" = as.data.frame(kpi_summary$weed_pressure),
"Gap_Filling" = as.data.frame(kpi_summary$gap_filling)
)
write_xlsx(sheets, excel_file)
message(paste("✓ AURA KPI data exported to:", excel_file))
# Also export to RDS for programmatic access
rds_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".rds")
saveRDS(all_kpis, rds_file)
message(paste("✓ AURA KPI RDS exported to:", rds_file))
return(list(excel = excel_file, rds = rds_file))
}
# ============================================================================
# ORCHESTRATOR FUNCTION
# ============================================================================
#' Calculate all 6 AURA KPIs
#'
#' Main entry point for AURA KPI calculation.
#' This function orchestrates the 6 KPI calculations and returns all results.
#'
#' @param field_boundaries_sf SF object with field geometries
#' @param current_week ISO week number (1-53)
#' @param current_year ISO week year
#' @param current_mosaic_dir Directory containing current week's mosaic
#' @param previous_mosaic_dir Directory containing previous week's mosaic (optional)
#' @param ci_rds_path Path to combined CI RDS file
#' @param harvesting_data Data frame with harvest data (optional)
#' @param output_dir Directory for KPI exports
#'
#' @return List with results from all 6 KPI functions
#'
#' @details
#' This function:
#' 1. Loads current week mosaic and extracts field statistics
#' 2. (Optionally) loads previous week mosaic for comparison metrics
#' 3. Calculates all 6 AURA KPIs
#' 4. Creates summary tables
#' 5. Exports results to Excel/RDS
#'
calculate_all_kpis <- function(
field_boundaries_sf,
current_week,
current_year,
current_mosaic_dir,
previous_mosaic_dir = NULL,
ci_rds_path = NULL,
harvesting_data = NULL,
output_dir = NULL
) {
message("\n============ AURA KPI CALCULATION (6 KPIs) ============")
# Load current week mosaic
message("Loading current week mosaic...")
current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir)
if (is.null(current_mosaic)) {
stop("Could not load current week mosaic")
}
# Extract field statistics
message("Extracting field statistics from current mosaic...")
current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
ci_pixels_by_field <- extract_ci_values(current_mosaic, field_boundaries_sf)
# Load previous week mosaic (if available)
previous_stats <- NULL
if (!is.null(previous_mosaic_dir)) {
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")..."))
previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir)
if (!is.null(previous_mosaic)) {
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
} else {
message("Previous week mosaic not available - skipping area change KPI")
}
}
# Calculate 6 KPIs
message("\nCalculating KPI 1: Field Uniformity...")
uniformity_kpi <- calculate_field_uniformity_kpi(ci_pixels_by_field, field_boundaries_sf, current_mosaic)
message("Calculating KPI 2: Area Change...")
if (!is.null(previous_stats)) {
area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats)
} else {
area_change_kpi <- data.frame(
field_idx = seq_len(nrow(field_boundaries_sf)),
mean_ci_pct_change = NA_real_,
interpretation = rep("No previous data", nrow(field_boundaries_sf))
)
}
message("Calculating KPI 3: TCH Forecasted...")
tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf)
message("Calculating KPI 4: Growth Decline...")
growth_decline_kpi <- calculate_growth_decline_kpi(
list(ci_pixels_by_field) # Would need historical data for real trend
)
message("Calculating KPI 5: Weed Presence...")
weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field)
message("Calculating KPI 6: Gap Filling...")
gap_filling_kpi <- calculate_gap_filling_kpi(ci_rds_path)
# Compile results
all_kpis <- list(
uniformity = uniformity_kpi,
area_change = area_change_kpi,
tch_forecasted = tch_kpi,
growth_decline = growth_decline_kpi,
weed_presence = weed_kpi,
gap_filling = gap_filling_kpi
)
# Create summary tables
kpi_summary <- create_summary_tables(all_kpis)
# Export
export_paths <- export_kpi_data(all_kpis, kpi_summary, output_dir, current_week, current_year)
message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year, "\n"))
return(all_kpis)
}

View file

@ -0,0 +1,210 @@
# 80_UTILS_CANE_SUPPLY.R
# ============================================================================
# CANE SUPPLY CLIENT-SPECIFIC UTILITIES (SCRIPT 80 - CLIENT TYPE: cane_supply)
#
# Contains ANGATA and other cane supply-specific KPI and reporting functions.
#
# Currently, CANE_SUPPLY clients use the common utilities from 80_utils_common.R:
# - Weekly statistics (calculate_field_statistics, calculate_kpi_trends)
# - Field analysis reporting (generate_field_analysis_summary)
# - Excel export (export_field_analysis_excel)
#
# This file is structured to accommodate future ANGATA-specific functionality such as:
# - Custom yield models
# - Harvest readiness criteria
# - Supply chain integration hooks
# - ANGATA-specific alerting and messaging
#
# Orchestrator: (Placeholder - uses common functions)
# Dependencies: 00_common_utils.R, 80_utils_common.R
# Used by: 80_calculate_kpis.R (when client_type == "cane_supply")
# ============================================================================
library(terra)
library(sf)
library(dplyr)
library(tidyr)
library(readxl)
library(writexl)
# ============================================================================
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
# ============================================================================
#' Placeholder: ANGATA harvest readiness assessment
#'
#' Future implementation will integrate ANGATA-specific harvest readiness criteria:
#' - Maturation phase detection (CI threshold-based)
#' - Field age tracking (days since planting)
#' - Weather-based ripeness indicators (if available)
#' - Historical yield correlations
#'
#' @param field_ci CI values for the field
#' @param field_age_days Days since planting
#'
#' @return Character string with harvest readiness assessment
assess_harvest_readiness <- function(field_ci, field_age_days = NULL) {
# Placeholder implementation
# Real version would check:
# - Mean CI > 3.5 (maturation threshold)
# - Age > 350 days
# - Weekly growth rate < threshold (mature plateau)
if (is.null(field_ci) || all(is.na(field_ci))) {
return("No data available")
}
mean_ci <- mean(field_ci, na.rm = TRUE)
if (mean_ci > 3.5) {
return("Ready for harvest")
} else if (mean_ci > 2.5) {
return("Approaching harvest readiness")
} else {
return("Not ready - continue monitoring")
}
}
#' Placeholder: ANGATA supply chain status flags
#'
#' Future implementation will add supply chain-specific status indicators:
#' - Harvest scheduling readiness
#' - Equipment availability impact
#' - Transportation/logistics flags
#' - Quality parameter flags
#'
#' @param field_analysis Data frame with field analysis results
#'
#' @return Data frame with supply chain status columns
assess_supply_chain_status <- function(field_analysis) {
# Placeholder: return field analysis as-is
# Real version would add columns for:
# - schedule_ready (bool)
# - harvest_window_days (numeric)
# - transportation_priority (char)
# - quality_flags (char)
return(field_analysis)
}
# ============================================================================
# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
# ============================================================================
#' Orchestrate ANGATA weekly field analysis and reporting
#'
#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows.
#' Currently uses common utilities; future versions will add client-specific logic.
#'
#' @param field_boundaries_sf SF object with field geometries
#' @param current_week ISO week number (1-53)
#' @param current_year ISO week year
#' @param mosaic_dir Directory containing weekly mosaics
#' @param field_boundaries_path Path to field GeoJSON
#' @param harvesting_data Data frame with harvest data (optional)
#' @param output_dir Directory for exports
#' @param data_dir Base data directory
#'
#' @return List with field analysis results
#'
#' @details
#' This function:
#' 1. Loads weekly mosaic and extracts field statistics
#' 2. Calculates field statistics (using common utilities)
#' 3. Prepares field analysis summary
#' 4. Exports to Excel/CSV/RDS
#' 5. (Future) Applies ANGATA-specific assessments
#'
calculate_field_analysis_cane_supply <- function(
field_boundaries_sf,
current_week,
current_year,
mosaic_dir,
field_boundaries_path = NULL,
harvesting_data = NULL,
output_dir = file.path(PROJECT_DIR, "output"),
data_dir = NULL
) {
message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============")
# Load current week mosaic
message("Loading current week mosaic...")
current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year)
if (is.null(current_mosaic)) {
warning(paste("Could not load current week mosaic for week", current_week, current_year))
return(NULL)
}
# Extract field statistics
message("Extracting field statistics from current mosaic...")
field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
# Load previous week stats for comparison
message("Loading historical data for trends...")
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
previous_stats <- NULL
previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year)
if (!is.null(previous_mosaic)) {
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
}
# Calculate 4-week historical trend
message("Calculating field trends...")
ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds")
field_analysis <- calculate_field_statistics(
field_stats = field_stats,
previous_stats = previous_stats,
week_num = current_week,
year = current_year,
ci_rds_path = ci_rds_path,
field_boundaries_sf = field_boundaries_sf,
harvesting_data = harvesting_data
)
if (is.null(field_analysis)) {
message("Could not generate field analysis")
return(NULL)
}
# Generate summary
message("Generating summary statistics...")
summary_df <- generate_field_analysis_summary(field_analysis)
# Export
message("Exporting field analysis...")
export_paths <- export_field_analysis_excel(
field_analysis,
summary_df,
PROJECT_DIR,
current_week,
current_year,
output_dir
)
message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n"))
result <- list(
field_analysis = field_analysis,
summary = summary_df,
exports = export_paths
)
return(result)
}
# ============================================================================
# FUTURE EXTENSION POINTS
# ============================================================================
# Placeholder for ANGATA-specific utilities that may be added in future:
# - Custom yield models based on ANGATA historical data
# - Field condition thresholds specific to ANGATA growing practices
# - Integration with ANGATA harvest scheduling system
# - WhatsApp messaging templates for ANGATA supply chain stakeholders
# - Cost/benefit analysis for ANGATA operational decisions
# These functions can be added here as ANGATA requirements evolve.

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,7 @@ params:
ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx
report_date: "2026-01-22"
data_dir: "aura"
data_dir: "angata"
mail_day: "Wednesday"
borders: FALSE
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
@ -38,20 +38,26 @@ reporting_script <- TRUE
# Load all packages at once with suppressPackageStartupMessages
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(tidyverse)
library(tmap)
library(lubridate)
library(zoo)
library(rsample)
library(caret)
library(randomForest)
library(CAST)
library(knitr)
library(tidyr)
library(flextable)
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Spatial data handling
library(sf) # For reading/manipulating field boundaries (GeoJSON)
library(terra) # For raster operations (reading mosaic TIFFs for visualization)
# Data manipulation
library(tidyverse) # For dplyr, ggplot2, tidyr (data wrangling and visualization)
library(tidyr) # For data reshaping (pivot_longer, pivot_wider for wide-to-long conversion)
library(lubridate) # For date/time operations (week extraction, date formatting)
library(zoo) # For zoo objects (time series manipulation, na.locf for gap filling)
# Visualization
library(tmap) # For interactive maps (field boundary visualization)
# Reporting
library(knitr) # For R Markdown document generation (code execution and output)
library(flextable) # For formatted tables in Word output (professional table styling)
library(officer) # For Word document manipulation (custom formatting, headers, footers)
})
# Load custom utility functions
@ -101,10 +107,14 @@ project_dir <- params$data_dir
# Source project parameters with error handling
tryCatch({
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "00_common_utils.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
stop("Error loading project utilities: ", e$message)
})
# Load centralized paths
paths <- setup_project_directories(project_dir)
# Log initial configuration
safe_log("Starting the R Markdown script with KPIs")
safe_log(paste("mail_day params:", params$mail_day))
@ -114,15 +124,14 @@ safe_log(paste("mail_day variable:", mail_day))
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
## SIMPLE KPI LOADING - robust lookup with fallbacks
# Primary expected directory inside the laravel storage
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
# Primary expected directory from centralized paths
kpi_data_dir <- paths$kpi_reports_dir
date_suffix <- format(as.Date(report_date), "%Y%m%d")
# Calculate current week from report_date using ISO 8601 week numbering
report_date_obj <- as.Date(report_date)
current_week <- lubridate::isoweek(report_date_obj)
current_iso_year <- lubridate::isoyear(report_date_obj)
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_iso_year)
current_week <- as.numeric(format(as.Date(report_date), "%V"))
current_year <- as.numeric(format(as.Date(report_date), "%G"))
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(
@ -346,50 +355,17 @@ safe_log(paste("Week range:", week_start, "to", week_end))
```
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
# Load CI index data with error handling
# Load CI quadrant data for field-level analysis
tryCatch({
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
safe_log("Successfully loaded CI quadrant data")
}, error = function(e) {
stop("Error loading CI quadrant data: ", e$message)
})
# Get file paths for different weeks using the utility function
tryCatch({
path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
# Log the calculated paths
safe_log("Required mosaic paths:")
safe_log(paste("Path to current week:", path_to_week_current))
safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
# Validate that files exist
if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
# Load raster data with terra functions
CI <- terra::rast(path_to_week_current)$CI
CI_m1 <- terra::rast(path_to_week_minus_1)$CI
CI_m2 <- terra::rast(path_to_week_minus_2)$CI
CI_m3 <- terra::rast(path_to_week_minus_3)$CI
# DEBUG: Check which weeks were actually loaded and their data ranges
safe_log(paste("DEBUG - CI (current) range:", paste(terra::minmax(CI)[,1], collapse=" to ")))
safe_log(paste("DEBUG - CI_m1 (week-1) range:", paste(terra::minmax(CI_m1)[,1], collapse=" to ")))
safe_log(paste("DEBUG - CI_m2 (week-2) range:", paste(terra::minmax(CI_m2)[,1], collapse=" to ")))
safe_log(paste("DEBUG - CI_m3 (week-3) range:", paste(terra::minmax(CI_m3)[,1], collapse=" to ")))
}, error = function(e) {
stop("Error loading raster data: ", e$message)
})
# NOTE: Overview maps skipped for this report
# Individual field sections load their own per-field mosaics directly
```
```
```{r compute_benchmarks_once, include=FALSE}
@ -450,7 +426,7 @@ if (exists("summary_tables") && !is.null(summary_tables)) {
## Executive Summary - Key Performance Indicators
```{r combined_kpi_table, echo=FALSE}
```{r combined_kpi_table, echo=FALSE, results='asis'}
# Combine all KPI tables into a single table with standardized column names
display_names <- c(
field_uniformity_summary = "Field Uniformity",
@ -504,7 +480,7 @@ ft
## Field Alerts
```{r field_alerts_table, echo=FALSE}
```{r field_alerts_table, echo=FALSE, results='asis'}
# Generate alerts for all fields
generate_field_alerts <- function(field_details_table) {
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
@ -610,42 +586,13 @@ if (exists("field_details_table") && !is.null(field_details_table)) {
}
```
```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE}
# Calculate difference rasters for comparisons
# When one week has NA values, the difference will also be NA (not zero)
# Initialize placeholders first to ensure they exist
last_week_dif_raster_abs <- NULL
three_week_dif_raster_abs <- NULL
tryCatch({
# Always calculate differences - NA values will propagate naturally
# This way empty weeks (all NA) result in NA differences, not misleading zeros
last_week_dif_raster_abs <- (CI - CI_m1)
three_week_dif_raster_abs <- (CI - CI_m3)
safe_log("Calculated difference rasters (NA values preserved)")
}, error = function(e) {
safe_log(paste("Error calculating difference rasters:", e$message), "ERROR")
# Fallback: create NA placeholders if calculation fails
if (is.null(last_week_dif_raster_abs)) {
last_week_dif_raster_abs <- CI * NA
}
if (is.null(three_week_dif_raster_abs)) {
three_week_dif_raster_abs <- CI * NA
}
})
# Final safety check - ensure variables exist in global environment
if (is.null(last_week_dif_raster_abs)) {
last_week_dif_raster_abs <- CI * NA
safe_log("Created NA placeholder for last_week_dif_raster_abs", "WARNING")
}
if (is.null(three_week_dif_raster_abs)) {
three_week_dif_raster_abs <- CI * NA
safe_log("Created NA placeholder for three_week_dif_raster_abs", "WARNING")
```{r data, message=TRUE, warning=TRUE, include=FALSE}
# Verify CI quadrant data is loaded from load_ci_data chunk
if (!exists("CI_quadrant") || is.null(CI_quadrant)) {
stop("CI_quadrant data not available - check load_ci_data chunk")
}
safe_log("CI quadrant data verified for field-level analysis")
```
```
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
@ -665,76 +612,6 @@ tryCatch({
stop("Error loading field boundaries: ", e$message)
})
```
\newpage
## Chlorophyll Index (CI) Overview Map - Current Week
```{r render_ci_overview_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE}
# Create overview chlorophyll index map
tryCatch({
# Choose palette based on colorblind_friendly parameter
ci_palette <- if (colorblind_friendly) "viridis" else "brewer.rd_yl_gn"
# Base shape
map <- tmap::tm_shape(CI, unit = "m")
# Add raster layer with continuous spectrum (fixed scale 1-8 for consistent comparison)
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = ci_palette,
limits = c(1, 8)),
col.legend = tm_legend(title = "Chlorophyll Index (CI)",
orientation = "landscape",
position = tm_pos_out("center", "bottom")))
# Complete the map with layout and other elements
map <- map +
tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_shape(AllPivots0) +
tmap::tm_borders(col = "black") +
tmap::tm_text("sub_field", size = 0.6, col = "black")
# Print the map
print(map)
}, error = function(e) {
safe_log(paste("Error creating CI overview map:", e$message), "ERROR")
plot(1, type="n", axes=FALSE, xlab="", ylab="")
text(1, 1, "Error creating CI overview map", cex=1.5)
})
```
\newpage
## Weekly Chlorophyll Index Difference Map
```{r render_ci_difference_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE}
# Create chlorophyll index difference map
tryCatch({
# Choose palette based on colorblind_friendly parameter
diff_palette <- if (colorblind_friendly) "plasma" else "brewer.rd_yl_gn"
# Base shape
map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m")
# Add raster layer with continuous spectrum (centered at 0 for difference maps, fixed scale)
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = diff_palette,
midpoint = 0,
limits = c(-3, 3)),
col.legend = tm_legend(title = "Chlorophyll Index (CI) Change",
orientation = "landscape",
position = tm_pos_out("center", "bottom")))
# Complete the map with layout and other elements
map <- map +
tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_shape(AllPivots0) +
tmap::tm_borders(col = "black") +
tmap::tm_text("sub_field", size = 0.6, col = "black")
# Print the map
print(map)
}, error = function(e) {
safe_log(paste("Error creating CI difference map:", e$message), "ERROR")
plot(1, type="n", axes=FALSE, xlab="", ylab="")
text(1, 1, "Error creating CI difference map", cex=1.5)
})
```
# Section 2: Field-by-Field Analysis
@ -760,6 +637,23 @@ tryCatch({
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
# Use per-field weekly mosaic directory path from parameters_project.R
weekly_mosaic_per_field_dir <- weekly_CI_mosaic
# Helper to get week/year from a date
get_week_year <- function(date) {
list(
week = as.numeric(format(date, "%V")),
year = as.numeric(format(date, "%G"))
)
}
# Get week/year for current and historical weeks (local to field section)
current_ww <- get_week_year(as.Date(today))
minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1))
minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2))
minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3))
# Generate plots for each field
for(i in seq_along(AllPivots_merged$field)) {
field_name <- AllPivots_merged$field[i]
@ -775,15 +669,71 @@ tryCatch({
cat("\\newpage\n\n")
}
# Call ci_plot with explicit parameters (ci_plot will generate its own header)
# Load per-field mosaics directly for this field
field_CI <- NULL
field_CI_m1 <- NULL
field_CI_m2 <- NULL
field_CI_m3 <- NULL
tryCatch({
# Load per-field mosaic for current week
per_field_path_current <- get_per_field_mosaic_path(
weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year
)
if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) {
field_CI <- terra::rast(per_field_path_current)[["CI"]]
}
# Load per-field mosaic for week-1
per_field_path_m1 <- get_per_field_mosaic_path(
weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year
)
if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) {
field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]]
}
# Load per-field mosaic for week-2
per_field_path_m2 <- get_per_field_mosaic_path(
weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year
)
if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) {
field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]]
}
# Load per-field mosaic for week-3
per_field_path_m3 <- get_per_field_mosaic_path(
weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year
)
if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) {
field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]]
}
safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG")
}, error = function(e) {
safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING")
})
# Calculate difference rasters from per-field data (local to this field)
last_week_dif_raster_field <- NULL
three_week_dif_raster_field <- NULL
if (!is.null(field_CI) && !is.null(field_CI_m1)) {
last_week_dif_raster_field <- field_CI - field_CI_m1
}
if (!is.null(field_CI) && !is.null(field_CI_m3)) {
three_week_dif_raster_field <- field_CI - field_CI_m3
}
# Call ci_plot with field-specific rasters
ci_plot(
pivotName = field_name,
field_boundaries = AllPivots0,
current_ci = CI,
ci_minus_1 = CI_m1,
ci_minus_2 = CI_m2,
last_week_diff = last_week_dif_raster_abs,
three_week_diff = three_week_dif_raster_abs,
current_ci = field_CI,
ci_minus_1 = field_CI_m1,
ci_minus_2 = field_CI_m2,
last_week_diff = last_week_dif_raster_field,
three_week_diff = three_week_dif_raster_field,
harvesting_data = harvesting_data,
week = week,
week_minus_1 = week_minus_1,
@ -882,9 +832,9 @@ tryCatch({
The following table provides a comprehensive overview of all monitored fields with their key performance metrics from the KPI analysis.
```{r detailed_field_table, echo=FALSE}
```{r detailed_field_table, echo=FALSE, results='asis'}
# Load CI quadrant data to get field ages
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
# Identify the current season for each field based on report_date
# The current season is the one where the report_date falls within or shortly after the season
@ -1027,7 +977,7 @@ Comparing the current season to these lines helps assess whether crop growth is
\newpage
## Report Metadata
```{r report_metadata, echo=FALSE}
```{r report_metadata, echo=FALSE, results='asis'}
metadata_info <- data.frame(
Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"),
Value = c(

View file

@ -1,584 +0,0 @@
---
params:
ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx
report_date: "2025-09-30"
data_dir: "aura"
mail_day: "Wednesday"
borders: FALSE
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
colorblind_friendly: TRUE # use colorblind-friendly palettes (viridis/plasma)
facet_by_season: FALSE # facet CI trend plots by season instead of overlaying
x_axis_unit: "days" # x-axis unit for trend plots: "days" or "weeks"
output:
word_document:
reference_docx: !expr file.path("word-styles-reference-var1.docx")
toc: no
editor_options:
chunk_output_type: console
---
```{r setup_parameters, include=FALSE}
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
ci_plot_type <- params$ci_plot_type
colorblind_friendly <- params$colorblind_friendly
facet_by_season <- params$facet_by_season
x_axis_unit <- params$x_axis_unit
```
```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE}
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Set flag for reporting scripts to use pivot.geojson instead of pivot_2.geojson
reporting_script <- TRUE
# Load all packages at once with suppressPackageStartupMessages
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(tidyverse)
library(tmap)
library(lubridate)
library(zoo)
library(rsample)
library(caret)
library(randomForest)
library(CAST)
library(knitr)
library(tidyr)
library(flextable)
library(officer)
})
# Load custom utility functions
tryCatch({
source("report_utils.R")
}, error = function(e) {
message(paste("Error loading report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "report_utils.R"))
}, error = function(e) {
stop("Could not load report_utils.R from either location: ", e$message)
})
})
# Function to determine field priority level based on CV and Moran's I
# Returns: 1=Urgent, 2=Monitor, 3=No stress
get_field_priority_level <- function(cv, morans_i) {
# Handle NA values
if (is.na(cv) || is.na(morans_i)) return(3) # Default to no stress
# Determine priority based on thresholds
if (cv < 0.1) {
if (morans_i < 0.7) {
return(3) # No stress
} else if (morans_i <= 0.9) {
return(2) # Monitor (young field with some clustering)
} else {
return(1) # Urgent
}
} else if (cv <= 0.15) {
if (morans_i < 0.7) {
return(2) # Monitor
} else {
return(1) # Urgent
}
} else { # cv > 0.15
return(1) # Urgent
}
}
```
```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
# Set the project directory from parameters
project_dir <- params$data_dir
# Source project parameters with error handling
tryCatch({
source(here::here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Log initial configuration
safe_log("Starting the R Markdown script with KPIs - NO TABLES VERSION")
safe_log(paste("mail_day params:", params$mail_day))
safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
```
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
## SIMPLE KPI LOADING - robust lookup with fallbacks
# Primary expected directory inside the laravel storage
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
date_suffix <- format(as.Date(report_date), "%Y%m%d")
# Calculate current week from report_date using ISO 8601 week numbering
current_week <- as.numeric(format(as.Date(report_date), "%V"))
current_year <- as.numeric(format(as.Date(report_date), "%G"))
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
paste0(project_dir, "_kpi_summary_tables.rds"),
"kpi_summary_tables.rds",
paste0("kpi_summary_tables_", week_suffix, ".rds"),
paste0("kpi_summary_tables_", date_suffix, ".rds")
)
expected_field_details_names <- c(
paste0(project_dir, "_field_details_", week_suffix, ".rds"),
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
paste0(project_dir, "_field_details.rds"),
"field_details.rds"
)
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
try_load_from_dir <- function(dir, candidates) {
if (!dir.exists(dir)) return(NULL)
for (name in candidates) {
f <- file.path(dir, name)
if (file.exists(f)) return(f)
}
return(NULL)
}
# Try primary directory first
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
if (is.null(summary_file) || is.null(field_details_file)) {
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
# List rds files under laravel_app/storage/app recursively
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
# Try to match by expected names
if (is.null(summary_file)) {
matched <- files[basename(files) %in% expected_summary_names]
if (length(matched) > 0) summary_file <- matched[1]
}
if (is.null(field_details_file)) {
matched2 <- files[basename(files) %in% expected_field_details_names]
if (length(matched2) > 0) field_details_file <- matched2[1]
}
}
# Final checks and load with safe error messages
kpi_files_exist <- FALSE
if (!is.null(summary_file) && file.exists(summary_file)) {
safe_log(paste("Loading KPI summary from:", summary_file))
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
} else {
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
}
if (!is.null(field_details_file) && file.exists(field_details_file)) {
safe_log(paste("Loading field details from:", field_details_file))
field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL })
if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE
} else {
safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING")
}
if (kpi_files_exist) {
safe_log("✓ KPI summary tables loaded successfully")
} else {
safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING")
}
#' Generate field-specific KPI summary for display in reports
#' @param field_name Name of the field to summarize
#' @param field_details_table Data frame with field-level KPI details
#' @return Formatted text string with field KPI summary
generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadrant) {
tryCatch({
# Get field age from CI quadrant data for the CURRENT SEASON only
# First identify the current season for this field
current_season <- CI_quadrant %>%
filter(field == field_name, Date <= as.Date(report_date)) %>%
group_by(season) %>%
summarise(season_end = max(Date), .groups = 'drop') %>%
filter(season == max(season)) %>%
pull(season)
# Get the most recent DOY from the current season
field_age <- CI_quadrant %>%
filter(field == field_name, season == current_season) %>%
pull(DOY) %>%
max(na.rm = TRUE)
# Filter data for this specific field
field_data <- field_details_table %>%
filter(Field == field_name)
if (nrow(field_data) == 0) {
return(paste("**Field", field_name, "KPIs:** Data not available"))
}
# Aggregate sub-field data for field-level summary
# For categorical data, take the most common value or highest risk level
field_summary <- field_data %>%
summarise(
field_size = sum(`Field Size (ha)`, na.rm = TRUE),
uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"),
avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)),
max_gap_score = max(`Gap Score`, na.rm = TRUE),
highest_decline_risk = case_when(
any(`Decline Risk` == "Very-high") ~ "Very-high",
any(`Decline Risk` == "High") ~ "High",
any(`Decline Risk` == "Moderate") ~ "Moderate",
any(`Decline Risk` == "Low") ~ "Low",
TRUE ~ "Unknown"
),
highest_weed_risk = case_when(
any(`Weed Risk` == "High") ~ "High",
any(`Weed Risk` == "Moderate") ~ "Moderate",
any(`Weed Risk` == "Low") ~ "Low",
TRUE ~ "Unknown"
),
avg_mean_ci = mean(`Mean CI`, na.rm = TRUE),
avg_cv = mean(`CV Value`, na.rm = TRUE),
.groups = 'drop'
)
# Apply age-based filtering to yield forecast
if (is.na(field_age) || field_age < 240) {
field_summary$avg_yield_forecast <- NA_real_
}
# Format the summary text
yield_text <- if (is.na(field_summary$avg_yield_forecast)) {
"Yield Forecast: NA"
} else {
paste0("Yield Forecast: ", round(field_summary$avg_yield_forecast, 1), " t/ha")
}
kpi_text <- paste0(
"Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels,
" | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1),
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk,
" | Mean CI: ", round(field_summary$avg_mean_ci, 2)
)
kpi_text <- paste0("<span style='font-size:10pt'>", kpi_text, "</span>")
return(kpi_text)
}, error = function(e) {
safe_log(paste("Error generating KPI summary for field", field_name, ":", e$message), "ERROR")
return(paste("**Field", field_name, "KPIs:** Error generating summary"))
})
}
```
```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE}
# Set locale for consistent date formatting
Sys.setlocale("LC_TIME", "C")
# Initialize date variables from parameters
today <- as.character(report_date)
mail_day_as_character <- as.character(mail_day)
# Calculate report dates and weeks using ISO 8601 week numbering
report_date_obj <- as.Date(today)
current_week <- as.numeric(format(report_date_obj, "%V"))
year <- as.numeric(format(report_date_obj, "%Y"))
# Calculate dates for weekly analysis
week_start <- report_date_obj - ((as.numeric(format(report_date_obj, "%w")) + 1) %% 7)
week_end <- week_start + 6
# Calculate week days (copied from 05 script for compatibility)
report_date_as_week_day <- weekdays(lubridate::ymd(today))
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
# Calculate initial week number
week <- lubridate::week(today)
safe_log(paste("Initial week calculation:", week, "today:", today))
# Calculate previous dates for comparisons
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
# Adjust week calculation based on mail day
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
safe_log("Adjusting weeks because of mail day")
week <- lubridate::week(today) + 1
today_minus_1 <- as.character(lubridate::ymd(today))
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
}
# Calculate week numbers for previous weeks
week_minus_1 <- week - 1
week_minus_2 <- week - 2
week_minus_3 <- week - 3
# Format current week with leading zeros
week <- sprintf("%02d", week)
safe_log(paste("Report week:", current_week, "Year:", year))
safe_log(paste("Week range:", week_start, "to", week_end))
```
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
# Load CI quadrant data for field-level analysis
tryCatch({
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
safe_log("Successfully loaded CI quadrant data")
}, error = function(e) {
stop("Error loading CI quadrant data: ", e$message)
})
# NOTE: Overview maps skipped for this report
# Individual field sections load their own per-field mosaics directly
```
```
```{r compute_benchmarks_once, include=FALSE}
# Compute CI benchmarks once for the entire estate
benchmarks <- compute_ci_benchmarks(CI_quadrant, project_dir, c(10, 50, 90))
if (!is.null(benchmarks)) {
safe_log("Benchmarks computed successfully for the report")
} else {
safe_log("Failed to compute benchmarks", "WARNING")
}
```
## Report Summary
**Farm Location:** `r toupper(project_dir)` Estate
**Report Period:** Week `r current_week` of `r year`
**Data Source:** Planet Labs Satellite Imagery
**Analysis Type:** Chlorophyll Index (CI) Monitoring
**Report Generated on:** `r format(Sys.time(), "%B %d, %Y at %H:%M")`
**NOTE: THIS IS A NO-TABLES VERSION FOR DIAGNOSTIC PURPOSES - MAPS AND GRAPHS ONLY**
\newpage
# Section 2: Field-by-Field Analysis
## Overview of Field-Level Insights
This section provides detailed, field-specific analyses including chlorophyll index maps, trend graphs, and performance metrics. Each field is analyzed individually to support targeted interventions.
**Key Elements per Field:**
- Current and historical CI maps
- Week-over-week change visualizations
- Cumulative growth trends
- Field-specific KPI summaries
*Navigate to the following pages for individual field reports.*
\newpage
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
# Load field boundaries from parameters
tryCatch({
AllPivots0 <- field_boundaries_sf %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names
safe_log("Successfully loaded field boundaries")
# Prepare merged field list for use in summaries
AllPivots_merged <- AllPivots0 %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
}, error = function(e) {
stop("Error loading field boundaries: ", e$message)
})
```
```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, dpi=300, dev='png', message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'}
# Generate detailed visualizations for each field
tryCatch({
# Merge field polygons for processing and filter out NA field names
AllPivots_merged <- AllPivots0 %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA fields
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
# Use per-field weekly mosaic directory path from parameters_project.R
weekly_mosaic_per_field_dir <- weekly_CI_mosaic
# Helper to get week/year from a date
get_week_year <- function(date) {
list(
week = as.numeric(format(date, "%V")),
year = as.numeric(format(date, "%G"))
)
}
# Get week/year for current and historical weeks (local to field section)
current_ww <- get_week_year(as.Date(today))
minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1))
minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2))
minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3))
# Debug: check how many fields we're iterating
safe_log(paste("Starting visualization loop for", nrow(AllPivots_merged), "fields"), "DEBUG")
safe_log(paste("Fields to process:", paste(AllPivots_merged$field, collapse=", ")), "DEBUG")
# Generate plots for each field
for(i in seq_along(AllPivots_merged$field)) {
field_name <- AllPivots_merged$field[i]
safe_log(paste("Processing field", i, "of", nrow(AllPivots_merged), ":", field_name), "DEBUG")
# Skip if field_name is still NA (double check)
if(is.na(field_name)) {
safe_log(paste("Skipping field", i, "- NA name"), "DEBUG")
next
}
tryCatch({
# Add page break before each field (except the first one)
if(i > 1) {
cat("\\newpage\n\n")
}
# Load per-field mosaics directly for this field
field_CI <- NULL
field_CI_m1 <- NULL
field_CI_m2 <- NULL
field_CI_m3 <- NULL
tryCatch({
# Load per-field mosaic for current week
per_field_path_current <- get_per_field_mosaic_path(
weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year
)
safe_log(paste("Looking for mosaic at:", per_field_path_current, "exists?", file.exists(per_field_path_current %||% "")), "DEBUG")
if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) {
field_CI <- terra::rast(per_field_path_current)[["CI"]]
safe_log(paste("Successfully loaded field_CI for", field_name), "DEBUG")
} else {
safe_log(paste("Could not load field_CI for", field_name, "- file not found"), "DEBUG")
}
# Load per-field mosaic for week-1
per_field_path_m1 <- get_per_field_mosaic_path(
weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year
)
if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) {
field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]]
}
# Load per-field mosaic for week-2
per_field_path_m2 <- get_per_field_mosaic_path(
weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year
)
if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) {
field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]]
}
# Load per-field mosaic for week-3
per_field_path_m3 <- get_per_field_mosaic_path(
weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year
)
if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) {
field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]]
}
safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG")
}, error = function(e) {
safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING")
})
# Calculate difference rasters from per-field data (local to this field)
last_week_dif_raster_field <- NULL
three_week_dif_raster_field <- NULL
if (!is.null(field_CI) && !is.null(field_CI_m1)) {
last_week_dif_raster_field <- field_CI - field_CI_m1
}
if (!is.null(field_CI) && !is.null(field_CI_m3)) {
three_week_dif_raster_field <- field_CI - field_CI_m3
}
# Call ci_plot with field-specific rasters
ci_plot(
pivotName = field_name,
field_boundaries = AllPivots0,
current_ci = field_CI,
ci_minus_1 = field_CI_m1,
ci_minus_2 = field_CI_m2,
last_week_diff = last_week_dif_raster_field,
three_week_diff = three_week_dif_raster_field,
harvesting_data = harvesting_data,
week = week,
week_minus_1 = week_minus_1,
week_minus_2 = week_minus_2,
week_minus_3 = week_minus_3,
borders = borders,
colorblind_friendly = colorblind_friendly
)
cat("\n\n")
# Special handling for ESA project field 00f25 - remove duplicate DOY values
if (project_dir == "esa" && field_name == "00F25") {
ci_quadrant_data <- CI_quadrant %>%
filter(field == "00F25") %>%
arrange(DOY) %>%
group_by(DOY) %>%
slice(1) %>%
ungroup()
} else {
ci_quadrant_data <- CI_quadrant
}
# Call cum_ci_plot with explicit parameters
cum_ci_plot(
pivotName = field_name,
ci_quadrant_data = ci_quadrant_data,
plot_type = ci_plot_type,
facet_on = facet_by_season,
x_unit = x_axis_unit,
colorblind_friendly = colorblind_friendly,
show_benchmarks = TRUE,
estate_name = project_dir,
benchmark_percentiles = c(10, 50, 90),
benchmark_data = benchmarks
)
cat("\n\n")
# Add field-specific KPI summary under the graphs
if (exists("field_details_table") && !is.null(field_details_table)) {
kpi_summary <- generate_field_kpi_summary(field_name, field_details_table, CI_quadrant)
cat(kpi_summary)
cat("\n\n")
}
}, error = function(e) {
safe_log(paste("Error generating plots for field", field_name, ":", e$message), "ERROR")
cat("\\newpage\n\n")
cat("# Error generating plots for field ", field_name, "\n\n")
cat(e$message, "\n\n")
})
}
}, error = function(e) {
safe_log(paste("Error in field visualization section:", e$message), "ERROR")
cat("Error generating field plots. See log for details.\n\n")
})
```
\newpage
# END OF NO-TABLES DIAGNOSTIC REPORT
This diagnostic report contains only maps and graphs to help identify if the visualization system is working correctly.
*Generated for diagnostic purposes*

View file

@ -2,7 +2,7 @@
params:
ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx
report_date: "2026-01-25"
report_date: "2025-09-30"
data_dir: "angata"
mail_day: "Wednesday"
borders: FALSE
@ -38,20 +38,25 @@ reporting_script <- TRUE
# Load all packages at once with suppressPackageStartupMessages
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(tidyverse)
library(tmap)
library(lubridate)
library(zoo)
library(rsample)
library(caret)
library(randomForest)
library(CAST)
library(knitr)
library(tidyr)
library(flextable)
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Spatial data handling
library(sf) # For reading/manipulating field boundaries (GeoJSON)
library(terra) # For raster operations (reading mosaic TIFFs for visualization)
# Data manipulation
library(tidyverse) # For dplyr, ggplot2, tidyr (data wrangling and visualization)
library(tidyr) # For data reshaping (pivot_longer, pivot_wider for wide-to-long conversion)
library(lubridate) # For date/time operations (week extraction, date formatting)
library(zoo) # For zoo objects (time series manipulation, na.locf for gap filling)
# Visualization
library(tmap) # For interactive maps (field boundary visualization)
# Reporting
library(knitr) # For R Markdown document generation (code execution and output)
library(flextable) # For formatted tables in Word output (professional table styling)
})
# Load custom utility functions
@ -105,6 +110,16 @@ tryCatch({
stop("Error loading parameters_project.R: ", e$message)
})
# Source common utilities for logging and helper functions
tryCatch({
source(here::here("r_app", "00_common_utils.R"))
}, error = function(e) {
stop("Error loading 00_common_utils.R: ", e$message)
})
# Load centralized paths
paths <- setup_project_directories(project_dir)
# Log initial configuration
safe_log("Starting the R Markdown script with KPIs")
safe_log(paste("mail_day params:", params$mail_day))
@ -112,16 +127,23 @@ safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
```
```{r load_kpi_data, eval=TRUE, message=FALSE, warning=FALSE, include=FALSE}
```{r load_kpi_data, message=FALSE, warning=FALSE}
## SIMPLE KPI LOADING - robust lookup with fallbacks
# Primary expected directory inside the laravel storage
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
# First, show working directory for debugging
cat("\n=== DEBUG: R Markdown Working Directory ===\n")
cat(paste("getwd():", getwd(), "\n"))
cat(paste("Expected knit_dir from R Markdown:", knitr::opts_knit$get("root.dir"), "\n\n"))
# Primary expected directory from centralized paths
kpi_data_dir <- paths$kpi_reports_dir
date_suffix <- format(as.Date(report_date), "%Y%m%d")
# Calculate current week from report_date using ISO 8601 week numbering
current_week <- as.numeric(format(as.Date(report_date), "%V"))
current_iso_year <- as.numeric(format(as.Date(report_date), "%G"))
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_iso_year)
current_year <- as.numeric(format(as.Date(report_date), "%G"))
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
@ -171,30 +193,69 @@ if (is.null(summary_file) || is.null(field_details_file)) {
# Final checks and load with safe error messages
kpi_files_exist <- FALSE
# Debug: log what we're looking for
cat("\n=== KPI LOADING DEBUG ===\n")
cat(paste("Working directory:", getwd(), "\n"))
cat(paste("project_dir:", project_dir, "\n"))
cat(paste("report_date:", report_date, "\n"))
cat(paste("Calculated week:", current_week, "year:", current_year, "\n"))
cat(paste("Looking for KPI files in:", kpi_data_dir, "\n"))
cat(paste("Directory exists:", dir.exists(kpi_data_dir), "\n"))
cat(paste("Expected filenames to match:\n"))
for (name in expected_summary_names) cat(paste(" -", name, "\n"))
# List what's actually in the directory
if (dir.exists(kpi_data_dir)) {
actual_files <- list.files(kpi_data_dir, pattern = ".*\\.rds$", full.names = FALSE)
cat(paste("Files in KPI directory (", length(actual_files), " total):\n"))
for (f in actual_files) cat(paste(" -", f, "\n"))
} else {
cat("KPI directory does NOT exist!\n")
}
if (!is.null(summary_file) && file.exists(summary_file)) {
safe_log(paste("Loading KPI summary from:", summary_file))
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
cat(paste("✓ FOUND summary file:", summary_file, "\n"))
cat(paste(" File size:", file.size(summary_file), "bytes\n"))
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL })
if (!is.null(summary_data)) {
cat(paste(" ✓ Loaded successfully. Class:", class(summary_data), "\n"))
if (is.list(summary_data)) {
cat(paste(" List names:", paste(names(summary_data), collapse = ", "), "\n"))
}
# Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format
if (!is.null(summary_data)) {
if (is.list(summary_data) && !is.data.frame(summary_data)) {
# New format from 09_field_analysis_weekly.R - just pass it through
if ("field_analysis_summary" %in% names(summary_data)) {
cat(" ✓ Found field_analysis_summary in list - will use this structure\n")
# Keep the new structure intact - combined_kpi_table will use it directly
kpi_files_exist <- TRUE
} else {
cat(" ! Old format detected\n")
# Old format - keep as is
summary_tables <- summary_data
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
}
} else {
cat(" ! Data frame format\n")
# Data frame format or direct tables
summary_tables <- summary_data
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
}
} else {
cat(" ✗ Failed to load RDS - summary_data is NULL\n")
}
} else {
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
safe_log(paste("Attempted directory:", kpi_data_dir), "WARNING")
# Try searching the entire workspace as fallback
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "kpi.*\\.rds$", recursive = TRUE, full.names = TRUE)
safe_log(paste("Found", length(files), "KPI RDS files in workspace"), "INFO")
if (length(files) > 0) {
safe_log(paste("Available files:", paste(basename(files), collapse = ", ")), "INFO")
}
}
if (!is.null(field_details_file) && file.exists(field_details_file)) {
@ -427,7 +488,7 @@ safe_log(paste("Week range:", week_start, "to", week_end))
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE, eval=TRUE}
# Load CI index data with error handling
tryCatch({
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
safe_log("Successfully loaded CI quadrant data")
}, error = function(e) {
@ -683,20 +744,77 @@ tryCatch({
\newpage
## 1.2 Key Performance Indicators
```{r combined_kpi_table, echo=FALSE, eval=TRUE}
```{r combined_kpi_table, echo=TRUE}
# Debug: check what variables exist
cat("\n=== DEBUG: combined_kpi_table chunk ===\n")
cat(paste("exists('summary_data'):", exists("summary_data"), "\n"))
cat(paste("exists('kpi_files_exist'):", exists("kpi_files_exist"), "\n"))
if (exists("kpi_files_exist")) {
cat(paste("kpi_files_exist value:", kpi_files_exist, "\n"))
}
if (exists("summary_data")) {
cat(paste("summary_data class:", class(summary_data), "\n"))
if (is.list(summary_data)) {
cat(paste("summary_data names:", paste(names(summary_data), collapse = ", "), "\n"))
cat(paste("has field_analysis_summary:", "field_analysis_summary" %in% names(summary_data), "\n"))
}
} else {
cat("summary_data DOES NOT EXIST in this chunk's environment!\n")
}
cat("\n")
# Create summary KPI table from field_analysis_summary data
# This shows: Phases, Triggers, Area Change, and Total Farm acreage
if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary" %in% names(summary_data)) {
field_analysis_summary <- summary_data$field_analysis_summary
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
# Load field analysis data
field_analysis_df <- summary_data$field_analysis
# If field_analysis_summary is NULL or doesn't exist, create it from field_analysis_df
if (is.null(summary_data$field_analysis_summary) || !("field_analysis_summary" %in% names(summary_data)) ||
!is.data.frame(summary_data$field_analysis_summary)) {
cat("\nNote: field_analysis_summary not in RDS, creating from field_analysis...\n")
# Create summary by aggregating by Status_Alert and Phase categories
# This groups fields by their phase and status to show distribution
phase_summary <- field_analysis_df %>%
filter(!is.na(Phase)) %>%
group_by(Phase) %>%
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
mutate(Category = Phase) %>%
select(Category, Acreage)
# Try to create Status trigger summary - use Status_Alert if available, otherwise use empty
trigger_summary <- tryCatch({
field_analysis_df %>%
filter(!is.na(Status_Alert), Status_Alert != "") %>%
group_by(Status_Alert) %>%
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
mutate(Category = Status_Alert) %>%
select(Category, Acreage)
}, error = function(e) {
cat("Could not create trigger summary:", e$message, "\n")
data.frame(Category = character(), Acreage = numeric())
})
# Combine into summary
field_analysis_summary <- bind_rows(phase_summary, trigger_summary)
cat(paste("Created summary with", nrow(field_analysis_summary), "category rows\n"))
} else {
# Use existing summary from RDS
field_analysis_summary <- summary_data$field_analysis_summary
}
# Phase names and trigger names to extract from summary
phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected",
"Germination Complete", "Germination Started", "No Active Trigger")
"Germination Complete", "Germination Started", "No Active Trigger",
"Ready for harvest-check", "Strong decline in crop health", "Harvested/bare")
# Extract phase distribution - match on category names directly
if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) {
phase_rows <- field_analysis_summary %>%
filter(Category %in% phase_names) %>%
select(Category, Acreage) %>%
@ -802,6 +920,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary"
}
ft
} else {
cat("KPI summary data available but is empty/invalid.\n")
}
} else {
cat("KPI summary data not available.\n")
}
@ -854,11 +975,11 @@ if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) {
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
field_analysis_table <- summary_data$field_analysis
# Extract fields with status triggers (non-null)
# Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger)
alerts_data <- field_analysis_table %>%
filter(!is.na(Status_trigger), Status_trigger != "") %>%
select(Field_id, Status_trigger) %>%
rename(Field = Field_id, Alert = Status_trigger)
filter(!is.na(Status_Alert), Status_Alert != "") %>%
select(Field_id, Status_Alert) %>%
rename(Field = Field_id, Alert = Status_Alert)
if (nrow(alerts_data) > 0) {
# Format alert messages for display
@ -892,22 +1013,50 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
# The report renders KPI tables and field summaries from that data
```
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE, eval=TRUE}
# Load field boundaries from parameters
field_boundaries_sf <- sf::st_make_valid(field_boundaries_sf)
tryCatch({
AllPivots0 <- field_boundaries_sf %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names
safe_log("Successfully loaded field boundaries")
```{r load_field_boundaries, message=FALSE, warning=FALSE, include=FALSE}
# Load field boundaries from parameters (with fallback if geometry is invalid)
field_boundaries_loaded <- FALSE
# Prepare merged field list for use in summaries
tryCatch({
# Try to load and validate the field boundaries
if (exists("field_boundaries_sf") && !is.null(field_boundaries_sf)) {
# Try to filter - this will trigger geometry validation
AllPivots0 <- field_boundaries_sf %>%
dplyr::filter(!is.na(field), !is.na(sub_field))
# If successful, also create merged field list
AllPivots_merged <- AllPivots0 %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names
dplyr::filter(!is.na(field), !is.na(sub_field)) %>%
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
field_boundaries_loaded <- TRUE
safe_log("✓ Successfully loaded field boundaries")
} else {
safe_log("⚠ field_boundaries_sf not found in environment")
}
}, error = function(e) {
stop("Error loading field boundaries: ", e$message)
# If geometry is invalid, try to fix or skip
safe_log(paste("⚠ Error loading field boundaries:", e$message), "WARNING")
safe_log("Attempting to fix invalid geometries using st_make_valid()...", "WARNING")
tryCatch({
# Try to repair invalid geometries
field_boundaries_sf_fixed <<- sf::st_make_valid(field_boundaries_sf)
AllPivots0 <<- field_boundaries_sf_fixed %>%
dplyr::filter(!is.na(field), !is.na(sub_field))
AllPivots_merged <<- AllPivots0 %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) %>%
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
field_boundaries_loaded <<- TRUE
safe_log("✓ Fixed invalid geometries and loaded field boundaries")
}, error = function(e2) {
safe_log(paste("⚠ Could not repair geometries:", e2$message), "WARNING")
safe_log("Continuing without field boundary data", "WARNING")
})
})
```
\newpage

View file

@ -0,0 +1,631 @@
# ==============================================================================
# SMARTCANE MANUAL PIPELINE RUNNER
# ==============================================================================
#
# This file documents all pipeline steps as MANUAL COPY-PASTE COMMANDS.
# Do NOT run this script directly - instead, copy individual commands and
# paste them into your PowerShell terminal.
#
# This approach allows you to:
# - Run steps one at a time and inspect outputs
# - Re-run failed steps without re-running successful ones
# - Monitor progress between steps
# - Troubleshoot issues more easily than with automated pipeline
#
# ==============================================================================
# PIPELINE SEQUENCE (IN ORDER)
# ==============================================================================
#
# 1. Python: Download Planet satellite imagery (optional - only if new data needed)
# 2. R10: Split farm TIFFs into per-field directory structure
# 3. R20: Extract Canopy Index (CI) from 4-band imagery
# 4. R30: Interpolate growth model (smooth CI time series)
# 5. R21: Convert CI data to CSV format for Python
# 6. Python31: Harvest imminent predictions (optional - requires harvest.xlsx)
# 7. R40: Create weekly mosaic TIFFs
# 8. R80: Calculate KPIs (field uniformity, trends, stress)
# 9. R90/91: Generate Word reports (optional - Agronomic or Cane Supply)
#
# ==============================================================================
# BEFORE YOU START
# ==============================================================================
#
# 1. Open PowerShell in the smartcane root directory:
# C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\
#
# 2. Define your parameters ONCE at the top of the session:
#
# $PROJECT = "angata" # Project: angata, chemba, xinavane, esa, simba
# $END_DATE = "2026-02-04" # YYYY-MM-DD format (e.g., 2026-02-04)
# $OFFSET = 7 # Days to look back (e.g., 7 for one week)
# $WEEK = 6 # ISO week number (1-53) - auto-calculated from END_DATE
# $YEAR = 2026 # ISO year - auto-calculated from END_DATE
#
# 3. Use these variables in the commands below by replacing [PROJECT], [END_DATE], etc.
#
# ==============================================================================
# COMMAND REFERENCE
# ==============================================================================
# ==============================================================================
# STEP 0: PYTHON - Download Planet Satellite Imagery (OPTIONAL)
# ==============================================================================
#
# PURPOSE:
# Download 4-band (RGB+NIR) satellite imagery from Planet Labs API
# Downloads to: laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif
#
# WHEN TO RUN:
# - Only needed if you have new dates to process
# - Pipeline skips dates already in merged_tif/ or field_tiles/
# - First-time setup: download for your date range
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
# DATE: YYYY-MM-DD format (e.g., 2026-02-04)
# RESOLUTION: 3 meters (default) - can also use 5, 10
# --cleanup: Delete intermediate files after download
# --clear-all: Clear all output folders before downloading
#
# COMMAND #1 - Single Date Download:
#
# cd python_app
# python 00_download_8band_pu_optimized.py [PROJECT] --date [DATE] --resolution 3 --cleanup
#
# Example:
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
#
# COMMAND #2 - Batch Download (Multiple Dates):
# For date ranges, MUST use download_planet_missing_dates.py (not Script 00)
#
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
#
# Example:
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
#
# IMPORTANT DISTINCTION:
# - Script 00 (00_download_8band_pu_optimized.py): Only supports --date flag for SINGLE dates
# - Script download_planet_missing_dates.py: Supports --start/--end for DATE RANGES
# Script 00 does NOT have --start/--end flags despite documentation suggestion
# Use the correct script for your use case!
#
# EXPECTED OUTPUT:
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
#
# Note: Planet API requires authentication (PLANET_API_KEY environment variable)
# Cost: ~1,500-2,000 PU per date
#
# ============================================================================
# ==============================================================================
# STEP 1: R10 - Create Per-Field TIFF Structure
# ==============================================================================
#
# PURPOSE:
# Split farm-wide GeoTIFFs into per-field directory structure.
# Transforms: merged_tif/{DATE}.tif (single file)
# → field_tiles/{FIELD_ID}/{DATE}.tif (per-field files)
# This enables clean, scalable processing in downstream scripts.
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif (4-band RGB+NIR)
# - Field boundaries: laravel_app/storage/app/{PROJECT}/pivot.geojson
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif
# - One TIFF per field per date (1185 fields × N dates in Angata)
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba (default: angata)
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-09, default: today)
# OFFSET: Days to look back (e.g., 7 for one week, default: 7)
#
# COMMAND #1 - Default (All dates, current date, 7-day window):
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
#
# COMMAND #2 - Specific Date Range:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] [END_DATE] [OFFSET]
#
# Example (one week back from 2026-02-09):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
#
# Example (two weeks back from 2026-02-09):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 14
#
# EXPECTED OUTPUT:
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
# Storage location: laravel_app/storage/app/angata/field_tiles/
# Script execution time: 5-10 minutes (depends on number of dates)
#
# ============================================================================
# ==============================================================================
# STEP 2: R20 - Extract Chlorophyll Index (CI)
# ==============================================================================
#
# PURPOSE:
# Calculate Chlorophyll Index from 4-band imagery and create 5-band output TIFFs.
# Also extracts CI statistics per sub_field for daily tracking.
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif (4-band)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif (5-band with CI)
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD_ID}/{DATE}.rds
#
# EXPECTED BEHAVIOR:
# If field_tiles_CI/ or daily_vals/ missing files, Script 20 will process them
# Script 20 skips files that already exist (to avoid re-processing)
# ⚠️ IF NOT ALL FILES CREATED: See troubleshooting section below
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - date range end
# OFFSET: Days to look back (e.g., 7 for one week window)
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-09 7
#
# EXPECTED OUTPUT:
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
# Example: 1185 fields × 8 dates = 9,480 files in field_tiles_CI/
# Storage location: laravel_app/storage/app/angata/field_tiles_CI/
# Script execution time: 10-20 minutes (depends on number of dates+fields)
#
# NOTES:
# Script 20 processes dates between (END_DATE - OFFSET) and END_DATE
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
# To process all existing merged_tif files: Use large OFFSET (e.g., 365)
#
# ============================================================================
# ==============================================================================
# STEP 3: R30 - Interpolate Growth Model
# ==============================================================================
#
# PURPOSE:
# Smooth CI time series using LOESS interpolation to fill gaps.
# Creates continuous growth curves for each field across all measurement dates.
# Enables trend analysis, yield prediction, and cumulative growth metrics.
#
# INPUT:
# - Daily CI statistics from Script 20 (field_tiles_CI/ per-field RDS files)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
# - (This is the growth model output used by Script 21 and 80)
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R [PROJECT]
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
#
# EXPECTED OUTPUT:
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
# Contains: Interpolated CI data for all fields (wide format)
#
# ============================================================================
# ==============================================================================
# STEP 4: R21 - Convert CI RDS to CSV (Python Format)
# ==============================================================================
#
# PURPOSE:
# Convert growth model output from R's RDS format to Python-compatible CSV.
# Transforms from wide format (fields × dates) to long format (one row per field-date pair).
# Prepares data for Python harvest detection models.
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
# (Output from Script 30)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
# - Columns: field, sub_field, Date, FitData, DOY, value
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R [PROJECT]
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
#
# EXPECTED OUTPUT:
# File: ci_data_for_python.csv (~5-10 MB)
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
#
# ============================================================================
# ==============================================================================
# STEP 5: PYTHON31 - Harvest Imminent Predictions (OPTIONAL)
# ==============================================================================
#
# PURPOSE:
# Predict which fields are approaching harvest in the next 28 days.
# Uses neural network (Model 307) trained on historical harvest dates.
# Generates weekly probability scores for operational harvest scheduling.
#
# REQUIRES:
# - harvest.xlsx with field planting/harvest dates
# - ci_data_for_python.csv from Script 21
# - PyTorch environment (conda pytorch_gpu)
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/Data/harvest.xlsx
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
# - Columns: field, sub_field, imminent_prob, detected_prob, week, year, as_of_date, num_days
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py [PROJECT]
#
# Example:
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py angata
#
# EXPECTED OUTPUT:
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
# Rows: One per field (e.g., 1185 rows for Angata)
#
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
#
# ============================================================================
# ==============================================================================
# STEP 6: R40 - Create Weekly Mosaic TIFFs
# ==============================================================================
#
# PURPOSE:
# Aggregate daily per-field CI TIFFs into weekly mosaics.
# Handles multiple dates (full week) with maximum CI value per pixel.
# Creates 5-band output for reporting and KPI calculations.
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif
# (Daily per-field CI TIFFs from Script 20)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_{WW}_{YYYY}.tif
# - One per field per week (e.g., 1185 fields × 1 week = 1,185 files)
#
# PARAMETERS:
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - determines ISO week
# OFFSET: Days to look back (e.g., 7 for one week window)
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R [END_DATE] [OFFSET] [PROJECT]
#
# Example (one week window):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
#
# EXPECTED OUTPUT:
# Location: laravel_app/storage/app/angata/weekly_mosaic/
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
# Files created: #fields (e.g., 1185 for Angata)
# Storage: ~50-100 MB total for all mosaic TIFFs
# Script execution time: 5-10 minutes
#
# NOTE: Files are named with ISO week number (WW) and year (YYYY)
# Week calculation is automatic based on END_DATE
#
# ============================================================================
# ==============================================================================
# STEP 7: R80 - Calculate Key Performance Indicators (KPIs)
# ==============================================================================
#
# PURPOSE:
# Calculate per-field metrics from weekly mosaic TIFFs:
# - Field uniformity (CV - Coefficient of Variation)
# - Growth trends (4-week and 8-week)
# - Area change detection
# - TCH forecast
# - Spatial clustering (weed/stress detection)
# - Generates Excel export for dashboards and reporting
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_*.tif
# - Field boundaries (pivot.geojson)
# - Harvest data (harvest.xlsx)
# - Historical stats cache (RDS from previous weeks)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds (cached stats)
# - 21 columns with field-level KPIs and alerts
#
# PARAMETERS:
# END_DATE: Report date in YYYY-MM-DD format (default: today)
# PROJECT: Project name: angata, chemba, xinavane, esa, simba (default: angata)
# OFFSET: Days to look back for historical comparison (default: 7, for backward compatibility)
#
# COMMAND #1 - Current Date & Default Project (Auto-detects TODAY):
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
#
# COMMAND #2 - Specific Date & Project:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET]
#
# Example (2026-02-09, angata, 7-day lookback):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-09 angata 7
#
# EXPECTED OUTPUT:
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
# Rows: One per field (e.g., 1185 for Angata)
# Columns: 21 KPI columns (uniformity, trend, alerts, etc.)
# Location: laravel_app/storage/app/angata/output/
# Script execution time: 10-20 minutes
#
# EXPECTED COLUMNS:
# field, sub_field, phase, cv (uniformity), ci_mean, area_ha, area_ac,
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
# spatial_cluster, alert_urgency, alert_type, alert_message, etc.
#
# CRITICAL DIFFERENCE - R80 Uses Different Argument Order Than R40:
# R40 order: [END_DATE] [OFFSET] [PROJECT]
# R80 order: [END_DATE] [PROJECT] [OFFSET]
# These are NOT the same! Ensure correct order for each script.
#
# ============================================================================
# ==============================================================================
# STEP 8: R90/R91 - Generate Word Report (OPTIONAL)
# ==============================================================================
#
# PURPOSE:
# Generate formatted Word report (.docx) with:
# - KPI summary tables and charts
# - Per-field performance metrics
# - Alerts and recommendations
# - Interpretation guides
#
# Client-Specific Reports:
# - R90: Agronomic Support (for AURA project)
# - R91: Cane Supply (for ANGATA, CHEMBA, XINAVANE, ESA)
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
# (from Script 80)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_*.docx
# - Formatted Word document (~5-10 MB)
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04)
# REPORT_TYPE: agronomic or cane_supply (determines which Rmd file to render)
#
# COMMAND #1 - AGRONOMIC REPORT (AURA project):
# From R console or R script:
#
# rmarkdown::render(
# "r_app/90_CI_report_with_kpis_simple.Rmd",
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
# output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx",
# output_dir = "laravel_app/storage/app/angata/reports"
# )
#
# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
# From R console or R script:
#
# rmarkdown::render(
# "r_app/91_CI_report_with_kpis_Angata.Rmd",
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
# output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
# output_dir = "laravel_app/storage/app/angata/reports"
# )
#
# EXPECTED OUTPUT:
# File: SmartCane_Report_*_{PROJECT}_{DATE}.docx
# Location: laravel_app/storage/app/{PROJECT}/reports/
# Script execution time: 5-10 minutes
#
# NOTE:
# These are R Markdown files and cannot be run directly via Rscript
# Use rmarkdown::render() from an R interactive session or wrapper script
# See run_full_pipeline.R for an automated example
#
# ============================================================================
# ==============================================================================
# QUICK REFERENCE: Common Workflows
# ==============================================================================
#
# WORKFLOW A: Weekly Update (Most Common)
# ─────────────────────────────────────────────────────────────────────────
# Goal: Process latest week of data through full pipeline
#
# Parameters:
# $PROJECT = "angata"
# $END_DATE = "2026-02-04" # Today or latest date available
# $OFFSET = 7 # One week back
#
# Steps:
# 1. SKIP Python download (if you already have data)
# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 7
# (Argument order: [PROJECT] [END_DATE] [OFFSET])
# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
# (Argument order: [END_DATE] [OFFSET] [PROJECT])
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-04 angata 7
# (Argument order: [END_DATE] [PROJECT] [OFFSET] - DIFFERENT from R40!)
# 8. OPTIONAL R91 (Cane Supply) - Use automated runner:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
# OR from R console:
# rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd",
# params=list(data_dir="angata", report_date=as.Date("2026-02-04")),
# output_file="SmartCane_Report_cane_supply_angata_2026-02-04.docx",
# output_dir="laravel_app/storage/app/angata/reports")
#
# Execution time: ~60-90 minutes total
#
#
# WORKFLOW B: Initial Setup (Large Backfill)
# ─────────────────────────────────────────────────────────────────────────
# Goal: Process multiple weeks of historical data
#
# Steps:
# 1. Python download (your entire date range)
# 2. Run R10 with large offset to process all historical dates:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 365
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
# 3. Run R20 with large offset to process all historical dates:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
# 4. Run R30 once (growth model full season)
# 5. Run R21 once (CSV export)
# 6. Run R40 with specific week windows as needed
# 7. Run R80 for each week you want KPIs for
# 6. For each week, run:
# - R40 with different END_DATE values (one per week)
# - R80 with different WEEK/YEAR values (one per week)
# - R91 optional (one per week report)
#
# Pro tip: Script R40 with offset=14 covers two weeks at once
# Then R40 again with offset=7 for just one week
#
#
# WORKFLOW C: Troubleshooting (Check Intermediate Outputs)
# ─────────────────────────────────────────────────────────────────────────
# Goal: Verify outputs before moving to next step
#
# After R10: Check field_tiles/{FIELD_ID}/ has #dates files
# After R20: Check field_tiles_CI/{FIELD_ID}/ has same #dates files
# After R30: Check Data/extracted_ci/cumulative_vals/ has All_pivots_*.rds
# After R40: Check weekly_mosaic/{FIELD_ID}/ has week_WW_YYYY.tif per week
# After R80: Check output/ has {PROJECT}_field_analysis_week*.xlsx
#
# ============================================================================
# ==============================================================================
# TROUBLESHOOTING
# ==============================================================================
#
# ISSUE: R20 not processing all field_tiles files
# ────────────────────────────────────────────────
# Symptom: field_tiles has 496 files, field_tiles_CI only has 5
#
# Possible causes:
# 1. Source files incomplete or corrupted
# 2. Script 20 skips because CI TIFF already exists (even if incomplete)
# 3. Partial run from previous attempt
#
# Solutions:
# 1. Delete the small number of files in field_tiles_CI/{FIELD}/ (don't delete all!)
# rm laravel_app/storage/app/angata/field_tiles_CI/{fieldnum}/*
# 2. Re-run Script 20
# 3. If still fails, delete field_tiles_CI completely and re-run Script 20
# rm -r laravel_app/storage/app/angata/field_tiles_CI/
#
# ISSUE: Script 80 says "No per-field mosaic files found"
# ────────────────────────────────────────────────────────
# Symptom: R80 fails to calculate KPIs
#
# Possible causes:
# 1. Script 40 hasn't run yet (weekly_mosaic doesn't exist)
# 2. Wrong END_DATE or WEEK/YEAR combination
# 3. weekly_mosaic/{FIELD}/ directory missing (old format?)
#
# Solutions:
# 1. Ensure Script 40 has completed: Check weekly_mosaic/{FIELD}/ exists with week_WW_YYYY.tif
# 2. Verify END_DATE is within date range of available CI data
# 3. For current week: End date must be THIS week (same ISO week as today)
#
# ISSUE: Python download fails ("Not authorized")
# ────────────────────────────────────────────────
# Symptom: python 00_download_8band_pu_optimized.py fails with authentication error
#
# Cause: PLANET_API_KEY environment variable not set
#
# Solution:
# 1. Save your Planet API key: $env:PLANET_API_KEY = "your_key_here"
# 2. Verify: $env:PLANET_API_KEY (should show your key)
# 3. Try download again
#
# ISSUE: R30 takes too long
# ──────────────────────────
# Symptom: Script 30 running for >30 minutes
#
# Cause: LOESS interpolation is slow with many dates/fields
#
# Solution:
# 1. This is normal - large date ranges slow down interpolation
# 2. Subsequent runs are faster (cached results)
# 3. If needed: reduce offset or run fewer weeks at a time
#
# ==============================================================================
# ==============================================================================
# SUMMARY OF FILES CREATED BY EACH SCRIPT
# ==============================================================================
#
# Script 10 creates:
# laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD}/{DATE}.tif
#
# Script 20 creates:
# laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD}/{DATE}.tif
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds
#
# Script 30 creates:
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
#
# Script 21 creates:
# laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
#
# Python 31 creates:
# laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
#
# Script 40 creates:
# laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD}/{DATE}/week_{WW}_{YYYY}.tif
#
# Script 80 creates:
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds
#
# Script 90/91 creates:
# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
#
# ==============================================================================

View file

@ -0,0 +1,54 @@
# ============================================================================
# SCRIPT XX: [DESCRIPTIVE TITLE]
# ============================================================================
# PURPOSE:
# [What this script does in 2-3 sentences. Example: Downloads satellite
# imagery from Planet API, processes 4-band RGB+NIR data, and saves
# merged GeoTIFFs for use by downstream analysis stages.]
#
# INPUT DATA:
# - Source: [Which directory/files it reads. Example: Planet API, hardcoded bbox]
# - Format: [TIFF, RDS, GeoJSON, etc. Example: 4-band uint16 GeoTIFF]
# - Location: [Full path example]
#
# OUTPUT DATA:
# - Destination: [Which directory/files it creates. Example: laravel_app/storage/app/{project}/merged_tif/]
# - Format: [TIFF, RDS, CSV, etc. Example: Single-band float32 GeoTIFF]
# - Naming convention: [Example: {YYYY-MM-DD}.tif]
#
# USAGE:
# Rscript XX_script_name.R [arg1] [arg2] [arg3]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/XX_script_name.R angata 2026-01-15
#
# PARAMETERS:
# - arg1: [Description, type, and valid values. Example: project (character) - angata, chemba, xinavane, esa, simba]
# - arg2: [Description, type, and valid values. Example: date (character, optional) - ISO format YYYY-MM-DD; default today]
#
# CLIENT TYPES:
# - cane_supply (ANGATA): [Yes/No - if Yes, briefly explain any differences]
# - agronomic_support (AURA): [Yes/No - if Yes, briefly explain any differences]
#
# DEPENDENCIES:
# - Packages: [dplyr, tidyr, terra, sf, readr, writexl, etc.]
# - Utils files: [parameters_project.R, 00_common_utils.R, XX_utils.R, etc.]
# - External data: [harvest.xlsx, pivot.geojson, etc.]
# - Data directories: [laravel_app/storage/app/{project}/]
#
# NOTES:
# [Any special considerations, assumptions, or future improvements.
# Example: Cloud filtering uses CI >= 0.5 threshold. Multi-field support
# implemented via field geometry masking from pivot.geojson.]
#
# RELATED ISSUES:
# SC-XXX: [Brief description of related work]
#
# HISTORY:
# 2026-02-03: [Description of change, if refactored or enhanced]
# ============================================================================
# NOTE: This is a TEMPLATE file for documentation purposes only.
# When creating a new script or updating an existing one, copy this template
# and fill in all sections with accurate information about your specific script.
# Then delete this comment block.

View file

@ -156,9 +156,10 @@ calculate_ci <- function(raster_obj) {
red_band <- raster_obj[[3]]
nir_band <- raster_obj[[4]]
# CI formula: (NIR / Red) - 1
# This highlights chlorophyll content in vegetation
ci_raster <- (nir_band / red_band) - 1
# CI formula: (NIR / Green) - 1, NOT (NIR / Red) - 1
# *** CRITICAL: Use GREEN band for Chlorophyll Index, NOT RED ***
# GREEN band is essential for proper chlorophyll-sensitive calculation
ci_raster <- (nir_band / green_band) - 1
# Filter extreme values that may result from division operations
ci_raster[ci_raster > 10] <- 10 # Cap max value

View file

@ -68,8 +68,9 @@ calculate_enhanced_indices <- function(red, green, blue, nir) {
grvi <- green / red
names(grvi) <- "GRVI"
# 6. Chlorophyll Index (current CI - for comparison)
ci <- nir / red - 1
# 6. Chlorophyll Index (CI = NIR / Green - 1, NOT NIR/Red)
# *** CRITICAL: Correct formula uses GREEN band, not RED ***
ci <- nir / green - 1
names(ci) <- "CI"
return(list(

View file

@ -69,26 +69,55 @@ calculate_week_numbers <- function(report_date = Sys.Date()) {
#' @param year Year
#' @param mosaic_dir Directory containing weekly mosaics
#' @return Terra raster with CI band, or NULL if file not found
# Helper function to load CI raster for a specific field (handles both single-file and per-field architectures)
load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) {
# Check if this is per-field loading mode
is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field")
if (is_per_field) {
# Per-field architecture: load this specific field's mosaic
per_field_dir <- attr(ci_raster_or_obj, "per_field_dir")
week_file <- attr(ci_raster_or_obj, "week_file")
field_mosaic_path <- file.path(per_field_dir, field_name, week_file)
if (file.exists(field_mosaic_path)) {
tryCatch({
field_mosaic <- terra::rast(field_mosaic_path)
# Extract CI band (5th band) if multi-band, otherwise use as-is
if (terra::nlyr(field_mosaic) >= 5) {
return(field_mosaic[[5]])
} else {
return(field_mosaic[[1]])
}
}, error = function(e) {
safe_log(paste("Error loading per-field mosaic for", field_name, ":", e$message), "WARNING")
return(NULL)
})
} else {
safe_log(paste("Per-field mosaic not found for", field_name), "WARNING")
return(NULL)
}
} else {
# Single-file architecture: crop from loaded raster
if (!is.null(field_vect)) {
return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE))
} else {
return(ci_raster_or_obj)
}
}
}
load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
week_path <- file.path(mosaic_dir, week_file)
if (!file.exists(week_path)) {
safe_log(paste("Weekly mosaic not found:", week_path), "WARNING")
return(NULL)
}
# FIRST: Try to load single-file mosaic (legacy approach)
if (file.exists(week_path)) {
tryCatch({
mosaic_raster <- terra::rast(week_path)
# Extract CI band by name or position
if ("CI" %in% names(mosaic_raster)) {
ci_raster <- mosaic_raster[["CI"]]
} else {
# Fallback: assume last band is CI (after Red, Green, Blue, NIR)
ci_raster <- mosaic_raster[[nlyr(mosaic_raster)]]
}
ci_raster <- mosaic_raster[[5]] # CI is the 5th band
names(ci_raster) <- "CI"
safe_log(paste("Loaded weekly mosaic:", week_file))
safe_log(paste("Loaded weekly mosaic (single-file):", week_file))
return(ci_raster)
}, error = function(e) {
safe_log(paste("Error loading mosaic:", e$message), "ERROR")
@ -96,6 +125,40 @@ load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
})
}
# SECOND: Per-field architecture - store mosaic_dir path for later per-field loading
# Don't try to merge - just return the directory path so field-level functions can load per-field
if (dir.exists(mosaic_dir)) {
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
# Check if any field has this week's mosaic
found_any <- FALSE
for (field in field_dirs) {
field_mosaic_path <- file.path(mosaic_dir, field, week_file)
if (file.exists(field_mosaic_path)) {
found_any <- TRUE
break
}
}
if (found_any) {
safe_log(paste("Found per-field mosaics for week", sprintf("%02d", week_num), year,
"- will load per-field on demand"))
# Return a special object that indicates per-field loading is needed
# Store the mosaic_dir path in the raster's metadata
dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA)
attr(dummy_raster, "per_field_dir") <- mosaic_dir
attr(dummy_raster, "week_file") <- week_file
attr(dummy_raster, "is_per_field") <- TRUE
return(dummy_raster)
}
}
# If we get here, no mosaic found
safe_log(paste("Weekly mosaic not found for week", sprintf("%02d", week_num), year), "WARNING")
return(NULL)
}
# Function to prepare predictions with consistent naming and formatting
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
@ -141,12 +204,16 @@ calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) {
# Extract field boundary
field_vect <- field_boundaries_vect[i]
# crop ci_raster with field_vect and use that for ci_values
cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE)
# Load appropriate CI raster using helper function
cropped_raster <- load_field_ci_raster(ci_raster, field_name, field_vect)
# Extract CI values for this field using helper function
if (!is.null(cropped_raster)) {
field_values <- extract_ci_values(cropped_raster, field_vect)
valid_values <- field_values[!is.na(field_values) & is.finite(field_values)]
} else {
valid_values <- c()
}
# If all valid values are 0 (cloud), fill with NA row
if (length(valid_values) == 0 || all(valid_values == 0)) {
@ -286,9 +353,18 @@ calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries)
# Extract field boundary
field_vect <- field_boundaries_vect[i]
# Extract CI values for both weeks (using helper to get CI band only)
current_values <- extract_ci_values(current_ci, field_vect)
previous_values <- extract_ci_values(previous_ci, field_vect)
# Load appropriate CI rasters using helper function
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
# Extract CI values for both weeks
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
current_values <- extract_ci_values(current_field_ci, field_vect)
previous_values <- extract_ci_values(previous_field_ci, field_vect)
} else {
current_values <- c()
previous_values <- c()
}
# Clean values
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
@ -569,8 +645,18 @@ calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundari
sub_field_name <- field_boundaries$sub_field[i]
field_vect <- field_boundaries_vect[i]
# Extract CI values for both weeks (using helper to get CI band only)
current_values <- extract_ci_values(current_ci, field_vect)
# Load appropriate CI rasters using helper function
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
# Extract CI values for both weeks
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
current_values <- extract_ci_values(current_field_ci, field_vect)
previous_values <- extract_ci_values(previous_field_ci, field_vect)
} else {
current_values <- c()
previous_values <- c()
}
previous_values <- extract_ci_values(previous_ci, field_vect)
# Clean values
@ -728,8 +814,17 @@ calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundarie
}
# Extract CI values for both weeks (using helper to get CI band only)
current_values <- extract_ci_values(current_ci, field_vect)
previous_values <- extract_ci_values(previous_ci, field_vect)
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
# Extract CI values for both weeks
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
current_values <- extract_ci_values(current_field_ci, field_vect)
previous_values <- extract_ci_values(previous_field_ci, field_vect)
} else {
current_values <- c()
previous_values <- c()
}
# Clean values
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
@ -810,8 +905,15 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
sub_field_name <- field_boundaries$sub_field[i]
field_vect <- field_boundaries_vect[i]
# Load appropriate CI raster using helper function
field_ci <- load_field_ci_raster(ci_raster, field_name, field_vect)
# Extract CI values using helper function
ci_values <- extract_ci_values(ci_raster, field_vect)
if (!is.null(field_ci)) {
ci_values <- extract_ci_values(field_ci, field_vect)
} else {
ci_values <- c()
}
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
if (length(valid_values) > 1) {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -4,24 +4,6 @@
# These functions support the creation of maps, charts and report elements
# for the CI_report_dashboard_planet.Rmd document.
#' Safe logging function that works whether log_message exists or not
#'
#' @param message The message to log
#' @param level The log level (default: "INFO")
#' @return NULL (used for side effects)
#'
safe_log <- function(message, level = "INFO") {
if (exists("log_message")) {
log_message(message, level)
} else {
if (level %in% c("ERROR", "WARNING")) {
warning(message)
} else {
message(message)
}
}
}
#' Creates a sub-chunk for use within RMarkdown documents
#'
#' @param g A ggplot object to render in the sub-chunk
@ -244,14 +226,14 @@ ci_plot <- function(pivotName,
# Filter for the specific pivot
AllPivots2 <- field_boundaries %>% dplyr::filter(field %in% pivotName)
# Create crop masks for different timepoints using terra functions
singlePivot <- terra::crop(current_ci, pivotShape) %>% terra::mask(., pivotShape)
singlePivot_m1 <- terra::crop(ci_minus_1, pivotShape) %>% terra::mask(., pivotShape)
singlePivot_m2 <- terra::crop(ci_minus_2, pivotShape) %>% terra::mask(., pivotShape)
# Per-field mosaics are already cropped to field boundaries, so use directly without cropping
singlePivot <- current_ci
singlePivot_m1 <- ci_minus_1
singlePivot_m2 <- ci_minus_2
# Create difference maps
abs_CI_last_week <- terra::crop(last_week_diff, pivotShape) %>% terra::mask(., pivotShape)
abs_CI_three_week <- terra::crop(three_week_diff, pivotShape) %>% terra::mask(., pivotShape)
# Use difference maps directly (already field-specific)
abs_CI_last_week <- last_week_diff
abs_CI_three_week <- three_week_diff
# Get planting date
planting_date <- harvesting_data %>%
@ -822,3 +804,144 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
})
}
#' Aggregate per-field weekly mosaics into a farm-level mosaic
#'
#' Reads all per-field mosaic TIFs for a given week and merges them into a single farm-level mosaic.
#' This is used for overview maps in the report (Script 90).
#'
#' Per-field mosaics already have proper geospatial metadata (CRS, geotransform) from Script 10,
#' so terra can align them automatically without needing field boundaries or extent information.
#'
#' @param weekly_mosaic_dir Path to weekly_mosaic directory (e.g., "laravel_app/storage/app/{project}/weekly_mosaic")
#' @param target_week ISO week number (e.g., 52)
#' @param target_year ISO year (e.g., 2025)
#' @return SpatRaster object (5-band: R,G,B,NIR,CI) or NULL if no fields found
#'
#' @details
#' Per-field mosaics are located at: weekly_mosaic/{FIELD}/week_WW_YYYY.tif
#' This function:
#' 1. Finds all per-field subdirectories
#' 2. Loads each field's weekly mosaic
#' 3. Merges to a single raster using terra::mosaic() (alignment handled automatically by metadata)
#' 4. Returns combined 5-band raster for visualization
#'
aggregate_per_field_mosaics_to_farm_level <- function(
weekly_mosaic_dir,
target_week,
target_year
) {
tryCatch({
# Validate directory exists
if (!dir.exists(weekly_mosaic_dir)) {
safe_log(paste("Weekly mosaic directory not found:", weekly_mosaic_dir), "WARNING")
return(NULL)
}
# Find all per-field subdirectories (non-TIF files at top level)
all_items <- list.files(weekly_mosaic_dir, full.names = FALSE)
field_dirs <- all_items[
!grepl("\\.tif$", all_items, ignore.case = TRUE) &
dir.exists(file.path(weekly_mosaic_dir, all_items))
]
if (length(field_dirs) == 0) {
safe_log(paste("No per-field directories found in", weekly_mosaic_dir), "WARNING")
return(NULL)
}
safe_log(paste("Found", length(field_dirs), "field directories. Aggregating week",
sprintf("%02d", target_week), target_year), "INFO")
# Collect rasters from each field
raster_list <- list()
for (field_dir in field_dirs) {
field_mosaic_path <- file.path(
weekly_mosaic_dir,
field_dir,
paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif")
)
if (file.exists(field_mosaic_path)) {
tryCatch({
r <- terra::rast(field_mosaic_path)
raster_list[[field_dir]] <- r
safe_log(paste("Loaded mosaic for field:", field_dir), "DEBUG")
}, error = function(e) {
safe_log(paste("Could not load mosaic for field", field_dir, ":", e$message), "WARNING")
})
}
}
if (length(raster_list) == 0) {
safe_log(paste("No field mosaics found for week", sprintf("%02d", target_week), target_year), "WARNING")
return(NULL)
}
safe_log(paste("Successfully loaded mosaics for", length(raster_list), "fields"), "INFO")
# Create a SpatRasterCollection and mosaic using correct terra syntax
tryCatch({
rsrc <- terra::sprc(raster_list)
safe_log(paste("Created SpatRasterCollection with", length(raster_list), "rasters"), "DEBUG")
# Mosaic the rasters - this merges them into a single continuous raster
farm_mosaic <- terra::mosaic(rsrc)
safe_log(paste("Aggregated", length(raster_list), "per-field mosaics into farm-level mosaic"), "INFO")
# Verify mosaic was created successfully
if (is.null(farm_mosaic)) {
stop("mosaic() returned NULL")
}
return(farm_mosaic)
}, error = function(e) {
safe_log(paste("Error during mosaic creation:", e$message), "ERROR")
return(NULL)
})
}, error = function(e) {
safe_log(paste("Error aggregating per-field mosaics:", e$message), "ERROR")
return(NULL)
})
}
#' Get per-field mosaic path (new per-field architecture)
#'
#' Returns the path to a per-field weekly mosaic for direct visualization.
#' Replaces the old cropping workflow: now we load the field's own mosaic instead of cropping farm-level.
#'
#' @param weekly_mosaic_dir Path to weekly_mosaic directory
#' @param field_name Name of the field
#' @param target_week ISO week number
#' @param target_year ISO year
#' @return Path to field-specific mosaic TIF, or NULL if not found
#'
get_per_field_mosaic_path <- function(
weekly_mosaic_dir,
field_name,
target_week,
target_year
) {
path <- file.path(
weekly_mosaic_dir,
field_name,
paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif")
)
if (file.exists(path)) {
return(path)
} else {
safe_log(paste("Per-field mosaic not found for field", field_name,
"week", sprintf("%02d", target_week), target_year), "WARNING")
return(NULL)
}
}

View file

@ -5,11 +5,12 @@
# 1. Python: Download Planet images
# 2. R 10: Create master grid and split TIFFs
# 3. R 20: CI Extraction
# 4. R 21: Convert CI RDS to CSV
# 5. R 30: Interpolate growth model
# 4. R 30: Interpolate growth model
# 5. R 21: Convert CI RDS to CSV (uses Script 30 output)
# 6. Python 31: Harvest imminent weekly
# 7. R 40: Mosaic creation
# 8. R 80: Calculate KPIs
# 9. R 90 (Agronomic) OR R 91 (Cane Supply): Generate Word Report
#
# ==============================================================================
# HOW TO RUN THIS SCRIPT
@ -30,11 +31,12 @@
# ==============================================================================
# *** EDIT THESE VARIABLES ***
end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date()
end_date <- as.Date("2026-02-04") # or specify: as.Date("2026-01-27") , Sys.Date()
offset <- 7 # days to look back
project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba"
data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif"
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
migrate_legacy_format <- TRUE # Set to TRUE to migrate from old format (merged_tif/merged_tif_8b) to new format (field_tiles)
# *** NOTE: data_source is now unified - all projects use field_tiles after migration ***
# ***************************
# Format dates
@ -48,23 +50,35 @@ pipeline_success <- TRUE
# ==============================================================================
cat("\n========== CHECKING EXISTING OUTPUTS ==========\n")
# Check Script 10 outputs (tiled splits)
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
tiles_dates <- if (dir.exists(tiles_dir)) {
list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
} else {
c()
# Check Script 10 outputs (field_tiles with per-field TIFFs)
# Script 10 outputs to field_tiles/{field_id}/{date}.tif
field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
tiles_dates <- c()
if (dir.exists(field_tiles_dir)) {
# Get all field subdirectories
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
if (length(field_dirs) > 0) {
# Get unique dates from all field directories
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
tiles_dates <- unique(sub("\\.tif$", "", all_files))
}
cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates)))
}
cat(sprintf("Script 10: %d dates already tiled (field_tiles/)\n", length(tiles_dates)))
# Check Script 20 outputs (CI extraction) - daily RDS files
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
ci_files <- if (dir.exists(ci_daily_dir)) {
list.files(ci_daily_dir, pattern = "\\.rds$")
} else {
c()
# Check Script 20 outputs (CI extraction) - per-field CI TIFFs at field_tiles_CI/{FIELD}/{DATE}.tif
# NOTE: This is the NEW per-field format, not the old extracted_ci/ flat format
field_tiles_ci_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
ci_tiff_dates <- c()
if (dir.exists(field_tiles_ci_dir)) {
# Get all field subdirectories
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
if (length(field_dirs) > 0) {
# Get unique dates from all field directories (dates that have been processed through Script 20)
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
ci_tiff_dates <- unique(sub("\\.tif$", "", all_files))
}
cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
}
cat(sprintf("Script 20: %d dates already processed (field_tiles_CI/)\n", length(ci_tiff_dates)))
# Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this
# Instead, check if CI RDS files exist - if they do, 21 should also run
@ -72,7 +86,7 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")
# Check Script 40 outputs (mosaics in weekly_tile_max/5x5)
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
mosaic_files <- if (dir.exists(mosaic_dir)) {
list.files(mosaic_dir, pattern = "\\.tif$")
} else {
@ -90,9 +104,9 @@ kpi_files <- if (dir.exists(kpi_dir)) {
cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))
# Determine if scripts should run based on outputs
skip_10 <- length(tiles_dates) > 0 && !force_rerun
skip_20 <- length(ci_files) > 0 && !force_rerun
skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped
skip_10 <- FALSE # Script 10 should always run to pick up any new merged_tif files
skip_20 <- FALSE # Script 20 always runs to process dates in the current window (per-field format)
skip_21 <- FALSE # Skip 21 only if 20 is skipped
skip_40 <- length(mosaic_files) > 0 && !force_rerun
skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks
@ -110,25 +124,50 @@ cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n")
tryCatch({
# Setup paths
base_path <- file.path("laravel_app", "storage", "app", project_dir)
merged_tifs_dir <- file.path(base_path, data_source)
# Get existing dates from raw TIFFs
# Always check merged_tif/ for existing downloads (both modes)
# merged_tif/ is where Python downloads go, before Script 10 splits to field_tiles/
merged_tifs_dir <- file.path(base_path, "merged_tif")
existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)
# Get existing dates from tiles (better indicator of completion)
existing_tile_dates <- tiles_dates
if (migrate_legacy_format) {
cat(sprintf(" Migration mode: Checking merged_tif/ for existing dates\n"))
} else {
cat(sprintf(" Production mode: Checking merged_tif/ and field_tiles/ for existing dates\n"))
}
# Find missing dates in the window
# Window: from (end_date - offset) to end_date
# Example: if end_date=2026-02-04 and offset=7, window is 2026-01-28 to 2026-02-04 (8 dates)
start_date <- end_date - offset
date_seq <- seq(start_date, end_date, by = "day")
target_dates <- format(date_seq, "%Y-%m-%d")
# Only download if tiles don't exist yet (more reliable than checking raw TIFFs)
missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]
# Also check field_tiles/ for dates that have already been processed through Script 10
# field_tiles/ contains {field_id}/{date}.tif files - check which dates are present
field_tiles_dir <- file.path(base_path, "field_tiles")
processed_dates <- c()
if (dir.exists(field_tiles_dir)) {
# Get all field subdirectories
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
if (length(field_dirs) > 0) {
# Get unique dates from all field directories
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
processed_dates <- unique(sub("\\.tif$", "", all_files))
}
}
cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates)))
cat(sprintf(" Missing dates in window: %d\n", length(missing_dates)))
# Combine existing dates from both merged_tif and field_tiles
all_existing_dates <- unique(c(existing_tiff_dates, processed_dates))
# Compare: which target dates don't exist in merged_tif/ or field_tiles/?
missing_dates <- target_dates[!(target_dates %in% all_existing_dates)]
cat(sprintf(" Existing dates in merged_tif/: %d\n", length(existing_tiff_dates)))
cat(sprintf(" Processed dates in field_tiles/: %d\n", length(processed_dates)))
cat(sprintf(" Target window: %s to %s (%d dates)\n", start_date, end_date, length(target_dates)))
cat(sprintf(" Missing dates to download: %d\n", length(missing_dates)))
# Download each missing date
download_count <- 0
@ -170,6 +209,51 @@ tryCatch({
pipeline_success <<- FALSE
})
# ==============================================================================
# MIGRATION: Move legacy format files to new format (if enabled)
# ==============================================================================
if (pipeline_success && migrate_legacy_format) {
cat("\n========== MIGRATION: MOVING LEGACY FORMAT FILES ==========\n")
tryCatch({
base_path <- file.path("laravel_app", "storage", "app", project_dir)
# PART 1: Move merged_tif files to field_tiles
merged_tif_old <- file.path(base_path, "merged_tif")
field_tiles_new <- file.path(base_path, "field_tiles")
if (dir.exists(merged_tif_old)) {
tif_files <- list.files(merged_tif_old, pattern = "\\.tif$", full.names = TRUE)
if (length(tif_files) > 0) {
dir.create(field_tiles_new, showWarnings = FALSE, recursive = TRUE)
for (file in tif_files) {
file.rename(file, file.path(field_tiles_new, basename(file)))
}
cat(sprintf("✓ Moved %d TIFF files from merged_tif/ to field_tiles/\n", length(tif_files)))
}
}
# PART 2: Move merged_tif_final files (CI) to field_tiles_CI
merged_tif_final_old <- file.path(base_path, "merged_tif_final")
field_tiles_ci_new <- file.path(base_path, "field_tiles_CI")
if (dir.exists(merged_tif_final_old)) {
ci_files <- list.files(merged_tif_final_old, pattern = "\\.tif$", full.names = TRUE)
if (length(ci_files) > 0) {
dir.create(field_tiles_ci_new, showWarnings = FALSE, recursive = TRUE)
for (file in ci_files) {
file.rename(file, file.path(field_tiles_ci_new, basename(file)))
}
cat(sprintf("✓ Moved %d CI TIFF files from merged_tif_final/ to field_tiles_CI/\n", length(ci_files)))
}
}
cat("✓ Migration completed successfully\n")
}, error = function(e) {
cat("✗ Error in migration:", e$message, "\n")
pipeline_success <<- FALSE
})
}
# ==============================================================================
# SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs
# ==============================================================================
@ -178,20 +262,38 @@ if (pipeline_success && !skip_10) {
tryCatch({
# Set environment variables for the script (Script 10 uses these for filtering)
assign("PROJECT", project_dir, envir = .GlobalEnv)
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
# Count field_tiles/ dates BEFORE Script 10 runs
field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
field_dirs_before <- c()
if (dir.exists(field_tiles_dir)) {
field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
if (length(field_dirs_tmp) > 0) {
all_files_before <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
field_dirs_before <- unique(sub("\\.tif$", "", all_files_before))
}
}
# Suppress verbose per-date output, show only summary
sink(nullfile())
source("r_app/10_create_master_grid_and_split_tiffs.R")
source("r_app/10_create_per_field_tiffs.R")
sink()
# Verify output
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
if (dir.exists(tiles_dir)) {
subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs)))
} else {
cat("✓ Script 10 completed\n")
# Count field_tiles/ dates AFTER Script 10 runs
field_dirs_after <- c()
if (dir.exists(field_tiles_dir)) {
field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
if (length(field_dirs_tmp) > 0) {
all_files_after <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
field_dirs_after <- unique(sub("\\.tif$", "", all_files_after))
}
}
# Calculate newly added dates
newly_added <- length(field_dirs_after) - length(field_dirs_before)
cat(sprintf("✓ Script 10 completed - processed %d new dates (total: %d dates in field_tiles/)\n", max(0, newly_added), length(field_dirs_after)))
}, error = function(e) {
sink()
cat("✗ Error in Script 10:", e$message, "\n")
@ -211,19 +313,71 @@ if (pipeline_success && !skip_20) {
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
source("r_app/20_ci_extraction.R")
main() # Call main() to execute the script with the environment variables
# If in migration mode, find all dates that need processing
if (migrate_legacy_format) {
cat("Migration mode: Finding all dates in field_tiles/ that need CI processing...\n")
# Verify CI output was created
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
if (dir.exists(ci_daily_dir)) {
files <- list.files(ci_daily_dir, pattern = "\\.rds$")
cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files)))
} else {
cat("✓ Script 20 completed\n")
# Get all dates from field_tiles/
field_tiles_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
all_dates_in_tiles <- c()
if (dir.exists(field_tiles_dir_check)) {
field_dirs_tmp <- list.dirs(field_tiles_dir_check, full.names = TRUE, recursive = FALSE)
if (length(field_dirs_tmp) > 0) {
all_files_tmp <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
all_dates_in_tiles <- unique(sub("\\.tif$", "", all_files_tmp))
}
}
# Get dates already processed in field_tiles_CI/
field_tiles_ci_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
processed_ci_dates <- c()
if (dir.exists(field_tiles_ci_check)) {
field_dirs_ci <- list.dirs(field_tiles_ci_check, full.names = TRUE, recursive = FALSE)
if (length(field_dirs_ci) > 0) {
all_files_ci <- list.files(field_dirs_ci, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
processed_ci_dates <- unique(sub("\\.tif$", "", all_files_ci))
}
}
# Get dates already in old RDS format
old_rds_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
processed_rds_dates <- c()
if (dir.exists(old_rds_dir)) {
rds_files <- list.files(old_rds_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$")
processed_rds_dates <- unique(sub("\\.rds$", "", rds_files))
}
# Find dates missing from either location
dates_missing_ci <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_ci_dates)]
dates_missing_rds <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_rds_dates)]
dates_to_process_migration <- sort(unique(c(dates_missing_ci, dates_missing_rds)))
cat(sprintf(" All dates in field_tiles/: %d\n", length(all_dates_in_tiles)))
cat(sprintf(" Already in field_tiles_CI/: %d\n", length(processed_ci_dates)))
cat(sprintf(" Already in extracted_ci/daily_vals/: %d\n", length(processed_rds_dates)))
cat(sprintf(" Dates needing processing: %d\n", length(dates_to_process_migration)))
if (length(dates_to_process_migration) > 0) {
assign("dates_to_process", dates_to_process_migration, envir = .GlobalEnv)
cat(sprintf(" Will process: %s to %s\n", dates_to_process_migration[1], dates_to_process_migration[length(dates_to_process_migration)]))
}
}
source("r_app/20_ci_extraction_per_field.R")
main()
# Verify output
field_tiles_ci_verify <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
tiff_count <- 0
if (dir.exists(field_tiles_ci_verify)) {
field_dirs_verify <- list.dirs(field_tiles_ci_verify, full.names = TRUE, recursive = FALSE)
if (length(field_dirs_verify) > 0) {
all_files_verify <- list.files(field_dirs_verify, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
tiff_count <- length(all_files_verify)
}
}
cat(sprintf("✓ Script 20 completed - %d CI TIFFs in field_tiles_CI/\n", tiff_count))
}, error = function(e) {
cat("✗ Error in Script 20:", e$message, "\n")
pipeline_success <<- FALSE
@ -233,7 +387,35 @@ if (pipeline_success && !skip_20) {
}
# ==============================================================================
# SCRIPT 21: CONVERT CI RDS TO CSV
# SCRIPT 30: INTERPOLATE GROWTH MODEL
# ==============================================================================
if (pipeline_success) {
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
tryCatch({
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
source("r_app/30_interpolate_growth_model.R")
main() # Call main() to execute the script with the environment variables
# Verify interpolated output
growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
if (dir.exists(growth_dir)) {
files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
} else {
cat("✓ Script 30 completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 30:", e$message, "\n")
pipeline_success <<- FALSE
})
}
# ==============================================================================
# SCRIPT 21: CONVERT CI RDS TO CSV (uses Script 30 output)
# ==============================================================================
if (pipeline_success && !skip_21) {
cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n")
@ -262,35 +444,6 @@ if (pipeline_success && !skip_21) {
cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n")
}
# ==============================================================================
# SCRIPT 30: INTERPOLATE GROWTH MODEL
# ==============================================================================
if (pipeline_success) {
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
tryCatch({
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
source("r_app/30_interpolate_growth_model.R")
main() # Call main() to execute the script with the environment variables
# Verify interpolated output
growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
if (dir.exists(growth_dir)) {
files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
} else {
cat("✓ Script 30 completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 30:", e$message, "\n")
pipeline_success <<- FALSE
})
}
# ==============================================================================
# PYTHON 31: HARVEST IMMINENT WEEKLY
# ==============================================================================
@ -335,9 +488,8 @@ if (pipeline_success && !skip_40) {
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
source("r_app/40_mosaic_creation.R")
source("r_app/40_mosaic_creation_per_field.R")
main() # Call main() to execute the script with the environment variables
# Verify mosaic output
@ -368,7 +520,6 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur
assign("end_date_str", end_date_str, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
source("r_app/80_calculate_kpis.R")
main() # Call main() to execute the script with the environment variables
@ -389,6 +540,92 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur
})
}
# ==============================================================================
# SCRIPT 90/91: GENERATE WORD REPORTS (CLIENT-TYPE SPECIFIC)
# ==============================================================================
if (pipeline_success) {
# Determine client type from project mapping
source("r_app/parameters_project.R")
source("r_app/00_common_utils.R")
client_type <- get_client_type(project_dir)
if (client_type == "agronomic_support") {
# SCRIPT 90: Agronomic Support Report (for Aura)
cat("\n========== RUNNING SCRIPT 90: AGRONOMIC SUPPORT REPORT (WORD) ==========\n")
tryCatch({
# Render the R Markdown file with parameters
# The Rmd file will load parameters_project and utilities internally
rmarkdown::render(
"r_app/90_CI_report_with_kpis_simple.Rmd",
params = list(
data_dir = project_dir,
report_date = end_date,
mail_day = "Monday",
borders = TRUE,
ci_plot_type = "both",
colorblind_friendly = FALSE,
facet_by_season = FALSE,
x_axis_unit = "days"
),
output_file = sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str),
output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"),
quiet = FALSE,
knit_root_dir = getwd()
)
# Verify report was created
report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports",
sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str))
if (file.exists(report_file)) {
cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", basename(report_file)))
} else {
cat("⚠ Script 90 report file not found - check rendering\n")
}
}, error = function(e) {
cat("✗ Error in Script 90:", e$message, "\n")
print(e)
pipeline_success <<- FALSE
})
} else if (client_type == "cane_supply") {
# SCRIPT 91: Cane Supply Report (for Angata, Chemba, Xinavane, ESA)
cat("\n========== RUNNING SCRIPT 91: CANE SUPPLY REPORT (WORD) ==========\n")
tryCatch({
# Render the R Markdown file with parameters
# The Rmd file will load parameters_project and utilities internally
rmarkdown::render(
"r_app/91_CI_report_with_kpis_Angata.Rmd",
params = list(
data_dir = project_dir,
report_date = end_date,
mail_day = "Monday",
borders = TRUE,
ci_plot_type = "both",
colorblind_friendly = FALSE,
facet_by_season = FALSE,
x_axis_unit = "days"
),
output_file = sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str),
output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"),
quiet = FALSE,
knit_root_dir = getwd()
)
# Verify report was created
report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports",
sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str))
if (file.exists(report_file)) {
cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", basename(report_file)))
} else {
cat("⚠ Script 91 report file not found - check rendering\n")
}
}, error = function(e) {
cat("✗ Error in Script 91:", e$message, "\n")
print(e)
pipeline_success <<- FALSE
})
}
}
# ==============================================================================
# SUMMARY
# ==============================================================================
@ -401,4 +638,4 @@ if (pipeline_success) {
} else {
cat("Status: ✗ Pipeline failed - check errors above\n")
}
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n")
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 30 → R 21 → Python 31 → R 40 → R 80 → R 90/91\n")