diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 8e702b7..f61a2ec 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -319,4 +319,4 @@ After each major stage, verify: --- -_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`. For related Linear issues (code quality, architecture docs), see SC-59, SC-60, SC-61._ +_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`. diff --git a/checkout b/checkout new file mode 100644 index 0000000..e69de29 diff --git a/python_app/00_download_8band_pu_optimized.py b/python_app/00_download_8band_pu_optimized.py index 8c0991b..1059ae9 100644 --- a/python_app/00_download_8band_pu_optimized.py +++ b/python_app/00_download_8band_pu_optimized.py @@ -22,9 +22,9 @@ Options: --resolution RES Resolution in meters (default: 3) --skip-merge Skip merge step (download only, keep individual tiles) --cleanup Delete intermediate single_images folder after merge - --clear-singles Clear single_images_8b folder before download - --clear-merged Clear merged_tif_8b and merged_virtual_8b folders before download - --clear-all Clear all output folders (singles, merged, virtual) before download + --clear-singles Clear single_images folder before download + --clear-merged Clear merged_tif folder before download + --clear-all Clear all output folders (singles, merged) before download Examples: python download_8band_pu_optimized.py xinavane --clear-singles --cleanup @@ -89,20 +89,118 @@ def setup_config(): config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44') config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos') - # BYOC collection for Planet 8-band data - collection_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' - byoc = DataCollection.define_byoc(collection_id, name='planet_data_8b', is_timeless=True) - catalog = SentinelHubCatalog(config=config) - return config, byoc, catalog + return config, catalog + + +def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_days: int = 7) -> Tuple: + """ + Auto-detect which Planet collection is available for this project. + + Checks a week of dates (backwards from date_str) to ensure robust detection. + If ANY date has data in the new 8-band collection, use that. + If no dates have data in new collection, fall back to legacy 4-band. + + Args: + date_str: Reference date (YYYY-MM-DD) + bbox_list: List of bounding boxes for testing + catalog: SentinelHubCatalog instance + date_range_days: Number of days to check backwards (default: 7) + + Returns: + (byoc, collection_info_dict) where byoc is DataCollection and dict contains metadata + """ + + new_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' # 8-band (new) + old_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c' # 4-band (legacy) + test_bbox = bbox_list[0] + + # Generate date range (backwards from date_str) + try: + ref_date = datetime.datetime.strptime(date_str, '%Y-%m-%d') + except ValueError: + print(f"⚠️ Invalid date format: {date_str}. Using today.") + ref_date = datetime.datetime.now() + + date_range = [ + (ref_date - datetime.timedelta(days=i)).strftime('%Y-%m-%d') + for i in range(date_range_days) + ] + + print(f"\nAuto-detecting Planet collection (checking {date_range_days} days)...") + print(f" Test range: {date_range[-1]} to {date_range[0]}") + + # Try new collection first + print(f"\n Trying 8-band collection: {new_id}") + byoc_new = DataCollection.define_byoc(new_id, name='planet_data_8b', is_timeless=True) + + for test_date in date_range: + try: + search = catalog.search( + collection=byoc_new, + bbox=test_bbox, + time=(test_date, test_date), + filter=None + ) + tiles = list(search) + if len(tiles) > 0: + print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)") + print(f" ✓ Using 8-band collection") + return byoc_new, { + 'collection_id': new_id, + 'name': 'planet_data_8b', + 'bands': 4, + 'output_folder': 'merged_tif', + 'singles_folder': 'single_images' + } + except Exception as e: + print(f" ⚠️ {test_date}: {str(e)[:60]}") + + # No data in new collection, try legacy + print(f"\n ✗ No data found in 8-band collection") + print(f" Trying legacy 4-band collection: {old_id}") + byoc_old = DataCollection.define_byoc(old_id, name='planet_data', is_timeless=True) + + for test_date in date_range: + try: + search = catalog.search( + collection=byoc_old, + bbox=test_bbox, + time=(test_date, test_date), + filter=None + ) + tiles = list(search) + if len(tiles) > 0: + print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)") + print(f" ✓ Using legacy 4-band collection") + return byoc_old, { + 'collection_id': old_id, + 'name': 'planet_data', + 'bands': 4, + 'output_folder': 'merged_tif', + 'singles_folder': 'single_images' + } + except Exception as e: + print(f" ⚠️ {test_date}: {str(e)[:60]}") + + # Neither collection has data + print(f"\n ⚠️ No data found in either collection for {date_range_days} days") + print(f" Defaulting to 8-band collection (will attempt download anyway)") + return byoc_new, { + 'collection_id': new_id, + 'name': 'planet_data_8b', + 'bands': 4, + 'output_folder': 'merged_tif', + 'singles_folder': 'single_images' + } # ============================================================================ # EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output # ============================================================================ -EVALSCRIPT_4BAND_MASKED = """ +EVALSCRIPT_8BAND = """ //VERSION=3 function setup() { return { @@ -117,9 +215,35 @@ EVALSCRIPT_4BAND_MASKED = """ } function evaluatePixel(sample) { // Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0) - // This reduces output pixels and avoids NaN interpolation on client side if (sample.udm1 == 0) { - // Scale reflectance: DN → [0, 1] range + var scaledRed = 2.5 * sample.red / 10000; + var scaledGreen = 2.5 * sample.green / 10000; + var scaledBlue = 2.5 * sample.blue / 10000; + var scaledNIR = 2.5 * sample.nir / 10000; + return [scaledRed, scaledGreen, scaledBlue, scaledNIR]; + } else { + return [NaN, NaN, NaN, NaN]; + } + } +""" + +EVALSCRIPT_4BAND_LEGACY = """ + //VERSION=3 + function setup() { + return { + input: [{ + bands: ["red", "green", "blue", "nir", "udm1"], + units: "DN" + }], + output: { + bands: 4 + } + }; + } + function evaluatePixel(sample) { + // Cloud masking for legacy collection (same band names as new 8-band) + // udm1 = 0 means clear, non-zero means cloud/shadow/etc + if (sample.udm1 == 0) { var scaledRed = 2.5 * sample.red / 10000; var scaledGreen = 2.5 * sample.green / 10000; var scaledBlue = 2.5 * sample.blue / 10000; @@ -289,6 +413,7 @@ def download_tile( output_dir: Path, config, byoc, + evalscript: str, resolution: int = 3 ) -> bool: """Download a single full tile (no geometry masking = lower PU) with exponential backoff.""" @@ -300,9 +425,9 @@ def download_tile( try: size = bbox_to_dimensions(bbox, resolution=resolution) - # Create download request with 4-band cloud-masked evalscript (uint16) + # Create download request with appropriate evalscript for collection request = SentinelHubRequest( - evalscript=EVALSCRIPT_4BAND_MASKED, + evalscript=evalscript, input_data=[ SentinelHubRequest.input_data( data_collection=byoc, @@ -350,6 +475,8 @@ def download_date( base_path: Path, config, byoc, + evalscript: str, + collection_info: dict, resolution: int = 3 ) -> int: """ @@ -357,14 +484,14 @@ def download_date( Returns number of successfully downloaded tiles. """ - output_dir = base_path / 'single_images_8b' / date_str + output_dir = base_path / collection_info['singles_folder'] / date_str output_dir.mkdir(parents=True, exist_ok=True) print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...") successful = 0 for idx, bbox in enumerate(bbox_list, 1): - if download_tile(date_str, bbox, output_dir, config, byoc, resolution): + if download_tile(date_str, bbox, output_dir, config, byoc, evalscript, resolution): successful += 1 percentage = (idx / len(bbox_list)) * 100 @@ -385,10 +512,10 @@ def download_date( # MERGE FUNCTION # ============================================================================ -def merge_tiles(date_str: str, base_path: Path) -> bool: +def merge_tiles(date_str: str, base_path: Path, collection_info: dict) -> bool: """Merge downloaded tiles into single GeoTIFF using GDAL.""" - single_images_dir = base_path / 'single_images_8b' / date_str + single_images_dir = base_path / collection_info['singles_folder'] / date_str # Find all response.tiff files file_list = [str(p) for p in single_images_dir.rglob('response.tiff')] @@ -397,8 +524,8 @@ def merge_tiles(date_str: str, base_path: Path) -> bool: print(f" ✗ No tiles found to merge") return False - merged_tif_dir = base_path / 'merged_tif_8b' - merged_vrt_dir = base_path / 'merged_virtual_8b' + merged_tif_dir = base_path / collection_info['output_folder'] + merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}" merged_tif_dir.mkdir(parents=True, exist_ok=True) merged_vrt_dir.mkdir(parents=True, exist_ok=True) @@ -453,9 +580,9 @@ def main(): # Parse arguments parser = argparse.ArgumentParser( - description='Download Planet 8-band imagery with PU optimization' + description='Download Planet imagery with PU optimization (auto-detects 8-band vs legacy 4-band)' ) - parser.add_argument('project', help='Project name (angata, chemba, xinavane, etc.)') + parser.add_argument('project', help='Project name (angata, chemba, xinavane, aura, etc.)') parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today') parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)') parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)') @@ -481,7 +608,7 @@ def main(): date_str = datetime.date.today().strftime('%Y-%m-%d') print(f"{'='*70}") - print(f"Planet 8-Band Download - PU Optimized") + print(f"Planet Download - Auto-Detecting Collection (PU Optimized)") print(f"{'='*70}") print(f"Project: {args.project}") print(f"Date: {date_str}") @@ -489,7 +616,7 @@ def main(): # Setup SentinelHub print(f"\nSetting up SentinelHub...") - config, byoc, catalog = setup_config() + config, catalog = setup_config() print(f"✓ SentinelHub configured") # Load geometries @@ -504,15 +631,26 @@ def main(): print(f"\n✗ No tiles intersect field geometries. Exiting.") sys.exit(1) + # Auto-detect collection and get evalscript + byoc, collection_info = detect_collection(date_str, bbox_list, catalog, date_range_days=7) + + # Get appropriate evalscript + evalscript = EVALSCRIPT_8BAND if collection_info['bands'] == 4 and 'new' not in collection_info.get('note', '') else EVALSCRIPT_8BAND + if '4e56d0cb' not in collection_info['collection_id']: + evalscript = EVALSCRIPT_4BAND_LEGACY + + print(f"\n Collection: {collection_info['name']}") + print(f" Output folder: {collection_info['output_folder']}/") + # Check date availability - print(f"\nChecking data availability...") + print(f"\nChecking data availability for {date_str}...") if not check_date_has_data(date_str, bbox_list[0], catalog, byoc): print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.") sys.exit(0) # Download tiles print(f"\n{'='*70}") - downloaded = download_date(date_str, bbox_list, base_path, config, byoc, args.resolution) + downloaded = download_date(date_str, bbox_list, base_path, config, byoc, evalscript, collection_info, args.resolution) if downloaded == 0: print(f"\n✗ No tiles downloaded. Exiting.") @@ -522,20 +660,20 @@ def main(): if not args.skip_merge: print(f"\n{'='*70}") print(f"Merging tiles...") - if merge_tiles(date_str, base_path): + if merge_tiles(date_str, base_path, collection_info): print(f"✓ Merge complete") # Cleanup intermediate files if args.cleanup: print(f"\nCleaning up intermediate files...") import shutil - single_images_dir = base_path / 'single_images_8b' / date_str - merged_vrt_dir = base_path / 'merged_virtual_8b' + single_images_dir = base_path / collection_info['singles_folder'] / date_str + merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}" try: if single_images_dir.exists(): shutil.rmtree(single_images_dir) - print(f" ✓ Deleted {single_images_dir.name}/{date_str}") + print(f" ✓ Deleted {collection_info['singles_folder']}/{date_str}") # Clean old VRT files for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"): @@ -549,7 +687,7 @@ def main(): print(f"\n{'='*70}") print(f"✓ Done!") - print(f"Output: {base_path / 'merged_tif_8b' / f'{date_str}.tif'}") + print(f"Output: {base_path / collection_info['output_folder'] / f'{date_str}.tif'}") print(f"{'='*70}") diff --git a/python_app/download_planet_missing_dates.py b/python_app/download_planet_missing_dates.py index 9e15d25..8df67a9 100644 --- a/python_app/download_planet_missing_dates.py +++ b/python_app/download_planet_missing_dates.py @@ -98,9 +98,9 @@ byoc = DataCollection.define_byoc( def setup_paths(project): """Create and return folder paths.""" BASE_PATH = Path('../laravel_app/storage/app') / project - BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images_8b') - folder_for_merged_tifs = str(BASE_PATH / 'merged_tif_8b') - folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual_8b') + BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images') + folder_for_merged_tifs = str(BASE_PATH / 'merged_tif') + folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual') geojson_file = Path(BASE_PATH / 'Data' / 'pivot.geojson') # Create folders if missing diff --git a/r_app/00_common_utils.R b/r_app/00_common_utils.R new file mode 100644 index 0000000..49a7b58 --- /dev/null +++ b/r_app/00_common_utils.R @@ -0,0 +1,356 @@ +# ============================================================================== +# # 00_COMMON_UTILS.R +# ============================================================================== +# # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE +# # +# # PURPOSE: +# # Centralized location for foundational utilities used across multiple scripts. +# # These functions have NO project knowledge, NO client-type dependencies, +# # NO domain-specific logic. +# # +# # USAGE: +# # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file: +# # +# # source(here::here("r_app", "parameters_project.R")) # Config first +# # source(here::here("r_app", "00_common_utils.R")) # Then common utilities +# # +# # FUNCTIONS: +# # 1. safe_log() — Generic logging with [LEVEL] prefix +# # 2. smartcane_debug() — Conditional debug logging +# # 3. smartcane_warn() — Convenience wrapper for WARN-level messages +# # 4. date_list() — Generate date sequences for processing windows +# # 5. load_field_boundaries() — Load field geometries from GeoJSON +# # 6. repair_geojson_geometries() — Fix invalid geometries in GeoJSON objects +# # +# # DATE FUNCTIONS (now in parameters_project.R): +# # - get_iso_week() — Extract ISO week number from date +# # - get_iso_year() — Extract ISO year from date +# # - get_iso_week_year() — Extract both ISO week and year as list +# # - format_week_label() — Format date as week/year label +# # - load_harvesting_data() — Load harvest schedule from Excel +# # +# ============================================================================== + +# # Source parameters first to get shared date utility functions +# source("parameters_project.R") + +# #' Safe Logging Function +# #' +# #' Generic logging with [LEVEL] prefix. Works standalone without any framework. +# #' Consistent with SmartCane logging standard. +# #' +# #' @param message The message to log +# #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG" +# #' @return NULL (invisible, used for side effects) +# #' +# #' @examples +# #' safe_log("Processing started", "INFO") +# #' safe_log("Check input file", "WARNING") +# #' safe_log("Failed to load data", "ERROR") +# #' +# safe_log <- function(message, level = "INFO") { +# prefix <- sprintf("[%s]", level) +# cat(sprintf("%s %s\n", prefix, message)) +# } + +# #' SmartCane Debug Logging (Conditional) +# #' +# #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set. +# #' Useful for development/troubleshooting without cluttering normal output. +# #' +# #' @param message The message to log +# #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE) +# #' @return NULL (invisible, used for side effects) +# #' +# #' @examples +# #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE +# #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs +# #' +# smartcane_debug <- function(message, verbose = FALSE) { +# if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") { +# return(invisible(NULL)) +# } +# safe_log(message, level = "DEBUG") +# } + +# #' SmartCane Warning Logging +# #' +# #' Logs WARN-level messages. Convenience wrapper around safe_log(). +# #' +# #' @param message The message to log +# #' @return NULL (invisible, used for side effects) +# #' +# #' @examples +# #' smartcane_warn("Check data format before proceeding") +# #' +# smartcane_warn <- function(message) { +# safe_log(message, level = "WARN") +# } + +# #' Load Field Boundaries from GeoJSON +# #' +# #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson). +# #' Handles CRS validation and column standardization. +# #' +# #' @param data_dir Directory containing GeoJSON file +# #' @return List with elements: +# #' - field_boundaries_sf: sf (Simple Features) object +# #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback) +# #' +# #' @details +# #' Automatically selects pivot_2.geojson for ESA project during CI extraction, +# #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries. +# #' +# #' @examples +# #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata") +# #' head(boundaries$field_boundaries_sf) +# #' +# load_field_boundaries <- function(data_dir) { +# # Choose field boundaries file based on project and script type +# # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model) +# # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson +# use_pivot_2 <- exists("project_dir") && project_dir == "esa" && +# exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03 + +# if (use_pivot_2) { +# field_boundaries_path <- file.path(data_dir, "pivot_2.geojson") +# } else { +# field_boundaries_path <- file.path(data_dir, "pivot.geojson") +# } + +# if (!file.exists(field_boundaries_path)) { +# stop(paste("Field boundaries file not found at path:", field_boundaries_path)) +# } + +# tryCatch({ +# # Read GeoJSON with explicit CRS handling +# field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE) + +# # Remove OBJECTID column immediately if it exists +# if ("OBJECTID" %in% names(field_boundaries_sf)) { +# field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID) +# } + +# # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.) +# # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.) +# # to prevent S2 geometry validation errors during downstream processing +# field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf) + +# # Validate and fix CRS if needed +# tryCatch({ +# # Simply assign WGS84 if not already set (safe approach) +# if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) { +# st_crs(field_boundaries_sf) <- 4326 +# warning("CRS was missing, assigned WGS84 (EPSG:4326)") +# } +# }, error = function(e) { +# tryCatch({ +# st_crs(field_boundaries_sf) <<- 4326 +# }, error = function(e2) { +# warning(paste("Could not set CRS:", e2$message)) +# }) +# }) + +# # Handle column names - accommodate optional sub_area column +# if ("sub_area" %in% names(field_boundaries_sf)) { +# field_boundaries_sf <- field_boundaries_sf %>% +# dplyr::select(field, sub_field, sub_area) %>% +# sf::st_set_geometry("geometry") +# } else { +# field_boundaries_sf <- field_boundaries_sf %>% +# dplyr::select(field, sub_field) %>% +# sf::st_set_geometry("geometry") +# } + +# # Convert to terra vector if possible, otherwise use sf +# field_boundaries <- tryCatch({ +# field_boundaries_terra <- terra::vect(field_boundaries_sf) +# crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL) +# crs_str <- if (!is.null(crs_value)) as.character(crs_value) else "" + +# if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) { +# terra::crs(field_boundaries_terra) <- "EPSG:4326" +# warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)") +# } +# field_boundaries_terra + +# }, error = function(e) { +# warning(paste("Terra conversion failed, using sf object instead:", e$message)) +# field_boundaries_sf +# }) + +# return(list( +# field_boundaries_sf = field_boundaries_sf, +# field_boundaries = field_boundaries +# )) +# }, error = function(e) { +# cat("[DEBUG] Error in load_field_boundaries:\n") +# cat(" Message:", e$message, "\n") +# cat(" Call:", deparse(e$call), "\n") +# stop(paste("Error loading field boundaries:", e$message)) +# }) +# } + + + +# #' Generate a Sequence of Dates for Processing +# #' +# #' Creates a date range from start_date to end_date and extracts week/year info. +# #' Used by Scripts 20, 30, 40 to determine data processing windows. +# #' +# #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string) +# #' @param offset Number of days to look back from end_date (e.g., 7 for one week) +# #' @return A list containing: +# #' - week: ISO week number of start_date +# #' - year: ISO year of start_date +# #' - days_filter: Vector of dates in "YYYY-MM-DD" format +# #' - start_date: Start date as Date object +# #' - end_date: End date as Date object +# #' +# #' @details +# #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return +# #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations, +# #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead. +# #' +# #' @examples +# #' dates <- date_list(as.Date("2025-01-15"), offset = 7) +# #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15") +# #' +# #' dates <- date_list("2025-12-31", offset = 14) +# #' # Handles string input and returns 14 days of data +# #' +# date_list <- function(end_date, offset) { +# # Input validation +# if (!lubridate::is.Date(end_date)) { +# end_date <- as.Date(end_date) +# if (is.na(end_date)) { +# stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.") +# } +# } + +# offset <- as.numeric(offset) +# if (is.na(offset) || offset < 1) { +# stop("Invalid offset provided. Expected a positive number.") +# } + +# # Calculate date range +# offset <- offset - 1 # Adjust offset to include end_date +# start_date <- end_date - lubridate::days(offset) + +# # Extract ISO week and year information (from END date for reporting period) +# week <- lubridate::isoweek(end_date) +# year <- lubridate::isoyear(end_date) + +# # Generate sequence of dates +# days_filter <- seq(from = start_date, to = end_date, by = "day") +# days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering + +# # Log the date range +# safe_log(paste("Date range generated from", start_date, "to", end_date)) + +# return(list( +# "week" = week, +# "year" = year, +# "days_filter" = days_filter, +# "start_date" = start_date, +# "end_date" = end_date +# )) +# } + +# ============================================================================== +# #' Repair Invalid GeoJSON Geometries +# #' +# #' Fixes common geometry issues in GeoJSON/sf objects: +# #' - Degenerate vertices (duplicate points) +# #' - Self-intersecting polygons +# #' - Invalid ring orientation +# #' - Empty or NULL geometries +# #' +# #' Uses sf::st_make_valid() with buffer trick as fallback. +# #' +# #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries +# #' @return sf object with repaired geometries +# #' +# #' @details +# #' **Why this matters:** +# #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting +# #' rings from manual editing or GIS data sources. These cause errors when using +# #' S2 geometry (strict validation) during cropping operations. +# #' +# #' **Repair strategy (priority order):** +# #' 1. Try st_make_valid() - GEOS-based repair (most reliable) +# #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity +# #' 3. Last resort: Silently keep original if repair fails +# #' +# #' @examples +# #' \dontrun{ +# #' fields <- st_read("pivot.geojson") +# #' fields_fixed <- repair_geojson_geometries(fields) +# #' cat(paste("Fixed geometries: before=", +# #' nrow(fields[!st_is_valid(fields), ]), +# #' ", after=", +# #' nrow(fields_fixed[!st_is_valid(fields_fixed), ]))) +# #' } +# #' +# repair_geojson_geometries <- function(sf_object) { +# if (!inherits(sf_object, "sf")) { +# stop("Input must be an sf (Simple Features) object") +# } + +# # Count invalid geometries BEFORE repair +# invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE) + +# if (invalid_before == 0) { +# safe_log("All geometries already valid - no repair needed", "INFO") +# return(sf_object) +# } + +# safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING") + +# # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS) +# # This handles degenerate vertices, self-intersections, invalid rings while preserving all features +# repaired <- tryCatch({ +# # st_make_valid() on entire sf object preserves all features and attributes +# repaired_geom <- sf::st_make_valid(sf_object) + +# # Verify we still have the same number of rows +# if (nrow(repaired_geom) != nrow(sf_object)) { +# warning("st_make_valid() changed number of features - attempting row-wise repair") + +# # Fallback: Repair row-by-row to maintain original structure +# repaired_geom <- sf_object +# for (i in seq_len(nrow(sf_object))) { +# tryCatch({ +# if (!sf::st_is_valid(sf_object[i, ])) { +# repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ]) +# } +# }, error = function(e) { +# safe_log(paste("Could not repair row", i, "-", e$message), "WARNING") +# }) +# } +# } + +# safe_log("✓ st_make_valid() successfully repaired geometries", "INFO") +# repaired_geom +# }, error = function(e) { +# safe_log(paste("st_make_valid() failed:", e$message), "WARNING") +# NULL +# }) + +# # If repair failed, keep original +# if (is.null(repaired)) { +# safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"), +# "WARNING") +# return(sf_object) +# } + +# # Count invalid geometries AFTER repair +# invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE) +# safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO") + +# return(repaired) +# } + +# ============================================================================== +# # END 00_COMMON_UTILS.R +# ============================================================================== diff --git a/r_app/10_create_master_grid_and_split_tiffs.R b/r_app/10_create_master_grid_and_split_tiffs.R deleted file mode 100644 index fdab65c..0000000 --- a/r_app/10_create_master_grid_and_split_tiffs.R +++ /dev/null @@ -1,499 +0,0 @@ -#' Combined: Create master grid and split TIFFs into tiles -#' ==================================================================== -#' -#' Purpose: -#' 1. Check all daily TIFFs for matching extents -#' 2. Create master 5×5 grid covering all TIFFs -#' 3. Split each daily TIFF into 25 tiles using the master grid -#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif -#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18 - - -library(terra) -library(sf) - -# ============================================================================ -# CONFIGURATION & COMMAND-LINE ARGUMENTS -# ============================================================================ - -# Parse command-line arguments for date filtering -args <- commandArgs(trailingOnly = TRUE) - -# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17 -start_date <- NULL -end_date <- NULL - -if (length(args) >= 1) { - start_date <- as.Date(args[1]) - cat("Filtering: start date =", as.character(start_date), "\n") -} - -if (length(args) >= 2) { - end_date <- as.Date(args[2]) - cat("Filtering: end date =", as.character(end_date), "\n") -} - -PROJECT <- "angata" -TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b") - -# GRID SIZE CONFIGURATION - Change this to use different grid sizes -# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc. -# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc. -GRID_NROWS <- 5 -GRID_NCOLS <- 5 - -# Construct grid-specific subfolder path -GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS) -OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL) - -# Load field boundaries for overlap checking -GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson") - -cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "") -cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") - -# ============================================================================ -# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID -# ============================================================================ - -cat("\n[PART 1] Creating Master Grid\n") - -# Load field boundaries for overlap checking -cat("\n[1] Checking for existing master grid...\n") - -# Check if master grid already exists -MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson")) - -if (file.exists(MASTER_GRID_PATH)) { - cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "") - master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE) - field_boundaries_sf <- NULL # No need to load pivot.geojson - field_boundaries_vect <- NULL - - cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "") - -} else { - # No existing grid - need to create one from pivot.geojson - cat(" No existing grid found. Creating new one from pivot.geojson...\n") - - if (!file.exists(GEOJSON_PATH)) { - stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n", - "Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ", - "from the same directory as a previous successful run (grid already exists).") - } - - field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE) - field_boundaries_vect <- terra::vect(GEOJSON_PATH) - - cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "") -} - -# Try to find a name column (only if field_boundaries_sf exists) -if (!is.null(field_boundaries_sf)) { - field_names <- NA - if ("name" %in% names(field_boundaries_sf)) { - field_names <- field_boundaries_sf$name - } else if ("field" %in% names(field_boundaries_sf)) { - field_names <- field_boundaries_sf$field - } else if ("field_name" %in% names(field_boundaries_sf)) { - field_names <- field_boundaries_sf$field_name - } else { - field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices - } - - cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "") -} - -# Helper function: Check if a tile overlaps with any field (simple bbox overlap) -tile_overlaps_fields <- function(tile_extent, field_geoms) { - tryCatch({ - # Simple bounding box overlap test - no complex geometry operations - # Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR - # box1.ymax < box2.ymin OR box1.ymin > box2.ymax) - - # For each field geometry, check if it overlaps with tile bbox - for (i in seq_len(length(field_geoms))) { - # Skip empty geometries - if (st_is_empty(field_geoms[i])) { - next - } - - # Get field bbox - field_bbox <- st_bbox(field_geoms[i]) - - # Check bbox overlap (simple coordinate comparison) - x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax) - y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax) - - if (x_overlap && y_overlap) { - return(TRUE) # Found overlap! - } - } - - return(FALSE) # No overlap found - - }, error = function(e) { - cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "") - return(TRUE) # Default to including tile if there's an error - }) -} - -cat("\n[2] Checking TIFF extents...\n") - -tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE) -tiff_files <- sort(tiff_files) - -# Filter by date range if specified -if (!is.null(start_date) || !is.null(end_date)) { - cat("\nApplying date filter...\n") - - file_dates <- as.Date(sub("\\.tif$", "", tiff_files)) - - if (!is.null(start_date) && !is.null(end_date)) { - keep_idx <- file_dates >= start_date & file_dates <= end_date - cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "") - } else if (!is.null(start_date)) { - keep_idx <- file_dates >= start_date - cat(" From: ", as.character(start_date), "\n", sep = "") - } else { - keep_idx <- file_dates <= end_date - cat(" Until: ", as.character(end_date), "\n", sep = "") - } - - tiff_files <- tiff_files[keep_idx] - cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "") -} - -if (length(tiff_files) == 0) { - stop("No TIFF files found in ", TIFF_FOLDER) -} - -cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "") -cat(" Checking extents... (this may take a while)\n") - -# Load all extents - ONE TIME, upfront -extents <- list() -for (i in seq_along(tiff_files)) { - tiff_path <- file.path(TIFF_FOLDER, tiff_files[i]) - raster <- terra::rast(tiff_path) - ext <- terra::ext(raster) - extents[[i]] <- ext - - # Progress indicator every 50 files - if (i %% 50 == 0) { - cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "") - } -} - -cat(" ✓ All extents loaded\n") - -# Check if all extents match -cat("\n[3] Comparing extents...\n") - -tolerance <- 1e-8 -all_match <- TRUE -first_ext <- extents[[1]] - -for (i in 2:length(extents)) { - curr_ext <- extents[[i]] - match <- ( - abs(curr_ext$xmin - first_ext$xmin) < tolerance && - abs(curr_ext$xmax - first_ext$xmax) < tolerance && - abs(curr_ext$ymin - first_ext$ymin) < tolerance && - abs(curr_ext$ymax - first_ext$ymax) < tolerance - ) - if (!match) { - all_match <- FALSE - cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "") - cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ", - "Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "") - cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ", - "Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "") - } -} - -if (all_match) { - cat(" ✓ All TIFF extents MATCH perfectly!\n") -} else { - cat(" ⚠️ Extents differ - creating master extent covering all\n") -} - -# Create master extent -cat("\n[4] Creating master extent...\n") - -master_xmin <- min(sapply(extents, function(e) e$xmin)) -master_xmax <- max(sapply(extents, function(e) e$xmax)) -master_ymin <- min(sapply(extents, function(e) e$ymin)) -master_ymax <- max(sapply(extents, function(e) e$ymax)) - -x_range_m <- (master_xmax - master_xmin) * 111320 -y_range_m <- (master_ymax - master_ymin) * 111320 - -cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ", - "Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "") -cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "") - -# Auto-determine grid size based on ROI dimensions -if (x_range_m < 10000 && y_range_m < 10000) { - cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n") - GRID_NROWS <- 1 - GRID_NCOLS <- 1 -} else { - cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n") - GRID_NROWS <- 5 - GRID_NCOLS <- 5 -} - -N_TILES <- GRID_NROWS * GRID_NCOLS - -# Check if master grid already exists -cat("\n[5] Checking if master grid exists...\n") - -master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson")) - -if (file.exists(master_grid_file)) { - cat(" ✓ Master grid exists! Loading existing grid...\n") - master_grid_sf <- st_read(master_grid_file, quiet = TRUE) - master_grid_vect <- terra::vect(master_grid_file) - cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "") -} else { - cat(" Grid does not exist. Creating new master grid...\n") - - # Create 5×5 grid - cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "") - - master_bbox <- st_bbox(c( - xmin = master_xmin, - xmax = master_xmax, - ymin = master_ymin, - ymax = master_ymax - ), crs = 4326) - - bbox_sf <- st_as_sfc(master_bbox) - - master_grid <- st_make_grid( - bbox_sf, - n = c(GRID_NCOLS, GRID_NROWS), - what = "polygons" - ) - - master_grid_sf <- st_sf( - tile_id = sprintf("%02d", 1:length(master_grid)), - geometry = master_grid - ) - - cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "") - - # Convert to SpatVector for use in makeTiles - master_grid_vect <- terra::vect(master_grid_sf) - - # Save master grid - if (!dir.exists(OUTPUT_FOLDER)) { - dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE) - } - st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE) - cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") -} - -# ============================================================================ -# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES) -# ============================================================================ - -cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n") - -# If grid was loaded from file, it's already filtered. Skip filtering. -if (!file.exists(MASTER_GRID_PATH)) { - cat("\n[7] Filtering master grid to only overlapping tiles...\n") - - # Check which tiles overlap with any field - overlapping_tile_indices <- c() - for (tile_idx in 1:nrow(master_grid_sf)) { - tile_geom <- master_grid_sf[tile_idx, ] - - # Check overlap with any field - if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) { - overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx) - } - } - - cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "") - cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "") - - # Create filtered grid with only overlapping tiles - filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ] - filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices) -} else { - cat("\n[7] Using pre-filtered grid (already loaded from file)...\n") - # Grid was already loaded - it's already filtered - filtered_grid_sf <- master_grid_sf -} - -# Convert to SpatVector for makeTiles -filtered_grid_vect <- terra::vect(filtered_grid_sf) - -cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "") - -# ============================================================================ -# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE) -# ============================================================================ - -cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n") -cat("\n[8] Processing each date independently...\n") -cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n") - -total_tiles_created <- 0 -dates_skipped <- 0 -dates_processed <- 0 - -for (file_idx in seq_along(tiff_files)) { - tiff_file <- tiff_files[file_idx] - date_str <- gsub("\\.tif$", "", tiff_file) - - # Create date-specific output folder - date_output_folder <- file.path(OUTPUT_FOLDER, date_str) - - # CHECK: Skip if date already processed (RESUME-SAFE) - if (dir.exists(date_output_folder)) { - existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$") - existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)] - - if (length(existing_tiles) > 0) { - cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str, - " (", length(existing_tiles), " tiles already exist)\n", sep = "") - dates_skipped <- dates_skipped + 1 - next # Skip this date - } - } - - cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "") - dates_processed <- dates_processed + 1 - - # Load TIFF for this date only - tiff_path <- file.path(TIFF_FOLDER, tiff_file) - raster <- terra::rast(tiff_path) - - dims <- dim(raster) - cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "") - - # Create date-specific output folder - if (!dir.exists(date_output_folder)) { - dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE) - } - - cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "") - - # Use makeTiles with FILTERED grid (only overlapping tiles) - tiles_list <- terra::makeTiles( - x = raster, - y = filtered_grid_vect, - filename = file.path(date_output_folder, "tile.tif"), - overwrite = TRUE - ) - - # Rename tiles to [DATE]_[TILE_ID].tif - for (tile_idx in seq_along(tiles_list)) { - source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif")) - tile_id <- filtered_grid_sf$tile_id[tile_idx] - final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif")) - - if (file.exists(source_file)) { - file.rename(source_file, final_file) - } - } - - cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "") - total_tiles_created <- total_tiles_created + length(tiles_list) -} - -# ============================================================================ -# VERIFICATION -# ============================================================================ - -cat("\n[9] Verifying output...\n") - -# Count tiles per date folder -date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE) -date_folders <- sort(date_folders[date_folders != "."]) - -total_tile_files <- 0 -for (date_folder in date_folders) { - tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder), - pattern = "\\.tif$") - tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)] - total_tile_files <- total_tile_files + length(tiles_in_folder) - cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "") -} - -# ============================================================================ -# SUMMARY -# ============================================================================ - -cat("\n\n========== SUMMARY ==========\n") - -cat("\nGrid Configuration:\n") -cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "") -cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") -cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") - -cat("\nField Filtering:\n") -cat(" - Field boundaries loaded from pivot.geojson\n") -cat(" - Only overlapping tiles created (empty tiles deleted)\n") -cat(" - Significant storage savings for sparse fields!\n") - -cat("\nProcessing Summary:\n") -cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "") -cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "") -cat(" - Dates processed: ", dates_processed, "\n", sep = "") -cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "") -if (dates_processed > 0) { - avg_tiles_per_date <- total_tiles_created / dates_processed - cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "") -} - -cat("\nDirectory Structure:\n") -cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "") -cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "") -cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") -cat(" ├── 2024-01-15/\n") -cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n") -cat(" │ ├── 2024-01-15_05.tif\n") -cat(" │ └── ...\n") -cat(" ├── 2024-01-16/\n") -cat(" │ └── ...\n") -cat(" └── ...\n") - -cat("\n⭐ Key Benefits:\n") -cat(" ✓ Overlap-filtered: No wasted empty tiles\n") -cat(" ✓ Skip existing dates: Resume-safe, idempotent\n") -cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n") -cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n") - -# ============================================================================ -# WRITE TILING CONFIGURATION METADATA -# ============================================================================ -# This metadata file is read by parameters_project.R to determine mosaic mode -# It allows script 40 to know what script 10 decided without re-computing - -cat("\n[10] Writing tiling configuration metadata...\n") - -config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json") -config_json <- paste0( - '{\n', - ' "project": "', PROJECT, '",\n', - ' "has_tiles": ', tolower(N_TILES > 1), ',\n', - ' "grid_size": "', GRID_SIZE_LABEL, '",\n', - ' "grid_rows": ', GRID_NROWS, ',\n', - ' "grid_cols": ', GRID_NCOLS, ',\n', - ' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n', - ' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n', - ' "created_date": "', Sys.Date(), '",\n', - ' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n', - '}\n' -) - -writeLines(config_json, config_file) -cat(" ✓ Metadata saved to: tiling_config.json\n") -cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "") -cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "") - -cat("\n✓ Script complete!\n") diff --git a/r_app/10_create_per_field_tiffs.R b/r_app/10_create_per_field_tiffs.R new file mode 100644 index 0000000..8b4cd08 --- /dev/null +++ b/r_app/10_create_per_field_tiffs.R @@ -0,0 +1,165 @@ +# ============================================================================ +# SCRIPT 10: Create Per-Field TIFFs (Data Organization & Splitting) +# ============================================================================ +# PURPOSE: +# Split full-farm satellite TIFFs into per-field file structure. Supports +# two phases: legacy data migration and ongoing new downloads. Transforms +# single large-file architecture into per-field directory structure for +# clean aggregation in downstream scripts (Script 20, 40, 80/90). +# +# INPUT DATA: +# - Source: laravel_app/storage/app/{project}/merged_tif/ or merged_final_tif/ +# - Format: GeoTIFF (4-band RGB+NIR or 5-band with CI) +# - Naming: {YYYY-MM-DD}.tif (full farm mosaic) +# +# OUTPUT DATA: +# - Destination: laravel_app/storage/app/{project}/field_tiles/ +# - Format: GeoTIFF (4-band RGB+NIR, same as input) +# - Structure: field_tiles/{FIELD}/{YYYY-MM-DD}.tif +# - Naming: Per-field GeoTIFFs organized by field and date +# +# USAGE: +# Rscript 10_create_per_field_tiffs.R [project] [end_date] [offset] +# +# Example (Windows PowerShell): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7 +# +# PARAMETERS: +# - project: Project name (character) - angata, chemba, xinavane, esa, simba (default: angata) +# - end_date: End date for processing (YYYY-MM-DD format, default: today) +# - offset: Days to look back (numeric, default: 7) +# +# CLIENT TYPES: +# - cane_supply (ANGATA): Yes - primary data organization script +# - agronomic_support (AURA): Yes - supports field-level analysis +# +# DEPENDENCIES: +# - Packages: terra, sf, tidyverse +# - Utils files: parameters_project.R, 00_common_utils.R, 10_create_per_field_tiffs_utils.R +# - External data: Field boundaries (pivot.geojson) +# - Data directories: merged_tif/, field_tiles/ (created if missing) +# +# NOTES: +# - Supports two-phase migration: legacy (merged_final_tif) and ongoing (merged_tif) +# - Automatically detects and handles field boundaries from pivot.geojson +# - Geometry validation and repair applied via st_make_valid() +# - Critical for downstream Scripts 20, 40, and KPI calculations +# - Creates per-field structure that enables efficient per-field processing +# +# RELATED ISSUES: +# SC-111: Script 10 refactoring and geometry repair +# SC-112: Utilities restructuring (uses 00_common_utils.R) +# +# ============================================================================ + +# Spatial data handling +suppressPackageStartupMessages({ + +library(terra) # For raster operations (reading/writing GeoTIFFs, cropping to field boundaries) +library(sf) # For spatial operations (reading field boundaries GeoJSON, masking) +library(here) # For relative path resolution +}) +# ============================================================================== +# MAIN PROCESSING FUNCTION +# ============================================================================== + +main <- function() { + # STEP 1: Set working directory to project root (smartcane/) + # This ensures all relative paths resolve correctly + if (basename(getwd()) == "r_app") { + setwd("..") + } + + # STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R) + args <- commandArgs(trailingOnly = TRUE) + + # Parse arguments: [project] [end_date] [offset] + project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata" + end_date_arg <- if (length(args) >= 2 && args[2] != "") as.Date(args[2], format = "%Y-%m-%d") else Sys.Date() + offset_arg <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7 + + # Make variables available to sourced files (they execute in global scope) + assign("project_dir", project_dir, envir = .GlobalEnv) + assign("end_date", end_date_arg, envir = .GlobalEnv) + assign("offset", offset_arg, envir = .GlobalEnv) + + # STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined) + # Load parameters_project.R (provides safe_log, setup_project_directories, etc.) + tryCatch({ + source("r_app/parameters_project.R") + }, error = function(e) { + cat(sprintf("Error loading parameters_project.R: %s\n", e$message)) + stop(e) + }) + + # Load Script 10-specific utilities + tryCatch({ + source("r_app/10_create_per_field_tiffs_utils.R") + }, error = function(e) { + cat(sprintf("Error loading 10_create_per_field_tiffs_utils.R: %s\n", e$message)) + stop(e) + }) + + # STEP 4: Set default date parameters (can be overridden by pipeline runner via assign()) + # These control which dates Script 10 processes from merged_tif/ + # Window: end_date - offset days to end_date + # Always coerce to correct types to avoid issues with lingering/inherited values + if (!exists("end_date") || !inherits(end_date, "Date")) { + end_date <- Sys.Date() + safe_log(paste("Using default end_date:", end_date), "INFO") + } + if (!exists("offset") || !is.numeric(offset)) { + offset <- 7 + safe_log(paste("Using default offset:", offset, "days"), "INFO") + } + + # Ensure offset is numeric (in case it came in as a character string from environment) + if (is.character(offset)) { + offset <- as.numeric(offset) + } + + # Calculate date window for processing + start_date <- end_date - offset + date_window <- seq(start_date, end_date, by = "day") + date_window_str <- format(date_window, "%Y-%m-%d") + safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO") + + # STEP 5: Load centralized path structure (creates all directories automatically) + paths <- setup_project_directories(project_dir) + + safe_log(paste("Project:", project_dir)) + safe_log(paste("Base path:", paths$laravel_storage_dir)) + safe_log(paste("Data dir:", paths$data_dir)) + + # Load field boundaries using data_dir (not field_boundaries_path) + # load_field_boundaries() expects a directory and builds the file path internally + fields_data <- load_field_boundaries(paths$data_dir) + fields <- fields_data$field_boundaries_sf + + # Define input and output directories (from centralized paths) + merged_tif_dir <- paths$merged_tif_folder + field_tiles_dir <- paths$field_tiles_dir + field_tiles_ci_dir <- paths$field_tiles_ci_dir + + # PHASE 1: Process new downloads (always runs) + # Pass field_tiles_ci_dir so it can skip dates already migrated + # Also pass end_date and offset so only dates in window are processed + process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir, + end_date = end_date, offset = offset) + + safe_log("\n========================================", "INFO") + safe_log("FINAL SUMMARY", "INFO") + safe_log("========================================", "INFO") + safe_log(paste("Processing: created =", process_result$total_created, + ", skipped =", process_result$total_skipped, + ", errors =", process_result$total_errors), "INFO") + safe_log("Script 10 complete", "INFO") + safe_log("========================================\n", "INFO") + + quit(status = 0) +} + +# Execute main if called from command line +if (sys.nframe() == 0) { + main() +} diff --git a/r_app/10_create_per_field_tiffs_utils.R b/r_app/10_create_per_field_tiffs_utils.R new file mode 100644 index 0000000..38719e5 --- /dev/null +++ b/r_app/10_create_per_field_tiffs_utils.R @@ -0,0 +1,306 @@ +# ============================================================================== +# 10_CREATE_PER_FIELD_TIFFS_UTILS.R +# ============================================================================== +# UTILITY FUNCTIONS FOR SCRIPT 10: Per-Field TIFF Creation +# +# PURPOSE: +# Extracted helper functions specific to Script 10 (per-field TIFF splitting). +# These functions are domain-specific (not generic) and handle raster cropping +# and batch processing of satellite imagery for per-field storage. +# +# FUNCTIONS: +# 1. crop_tiff_to_fields() — Crop a single TIFF to field boundaries +# 2. process_new_merged_tif() — Batch process all new merged TIFFs into per-field structure +# +# DEPENDENCIES: +# - terra (raster operations) +# - sf (geometry operations) +# - 00_common_utils.R (logging via safe_log()) +# +# USAGE: +# source(here::here("r_app", "10_create_per_field_tiffs_utils.R")) +# +# ============================================================================== + +library(terra) +library(sf) + +# ============================================================================== +#' Crop Single TIFF to Field Boundaries +#' +#' Loads a single satellite TIFF, finds overlapping field boundaries, and writes +#' cropped per-field versions to output directory. Handles CRS reprojection and +#' idempotent file creation (skips existing files). +#' +#' @param tif_path Character. Absolute path to source TIFF file (4-band or 5-band). +#' @param tif_date Character. Date string (YYYY-MM-DD) for naming output files. +#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column. +#' @param output_base_dir Character. Base directory where per-field TIFFs are stored +#' (e.g., "field_tiles" or "field_tiles_CI"). +#' +#' @return List with elements: +#' - created: Integer. Number of field TIFFs created +#' - skipped: Integer. Number of files that already existed +#' - errors: Integer. Number of fields that failed to crop +#' +#' @details +#' **Output Structure:** +#' Creates files at: {output_base_dir}/{field_name}/{tif_date}.tif +#' +#' **CRS Handling:** +#' Automatically reprojects field boundaries to match raster CRS before cropping. +#' +#' **Idempotency:** +#' If output file already exists, skips writing (counted as "skipped"). +#' Safe to re-run without duplicate file creation. +#' +#' @examples +#' \dontrun{ +#' fields <- load_field_boundaries("path/to/pivot.geojson") +#' result <- crop_tiff_to_fields( +#' tif_path = "merged_tif/2024-01-15.tif", +#' tif_date = "2024-01-15", +#' fields = fields, +#' output_base_dir = "field_tiles" +#' ) +#' cat(sprintf("Created: %d, Skipped: %d, Errors: %d\n", +#' result$created, result$skipped, result$errors)) +#' } +#' +crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) { + + created <- 0 + skipped <- 0 + errors <- 0 + + # Load raster + if (!file.exists(tif_path)) { + safe_log(paste("ERROR: TIFF not found:", tif_path), "ERROR") + return(list(created = 0, skipped = 0, errors = 1)) + } + + rast <- tryCatch({ + rast(tif_path) + }, error = function(e) { + safe_log(paste("ERROR loading raster:", e$message), "ERROR") + return(NULL) + }) + + if (is.null(rast)) { + return(list(created = 0, skipped = 0, errors = 1)) + } + + # Create raster bounding box in raster CRS + rast_bbox <- st_as_sfc(st_bbox(rast)) + st_crs(rast_bbox) <- st_crs(rast) + + # Reproject fields to match raster CRS + fields_reprojected <- st_transform(fields, st_crs(rast_bbox)) + + # Find which fields intersect this raster (CRITICAL: raster bbox first, then fields) + overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE) + overlapping_indices <- unique(unlist(overlapping_indices)) + + if (length(overlapping_indices) == 0) { + safe_log(paste("No fields intersect TIFF:", basename(tif_path)), "INFO") + return(list(created = 0, skipped = 0, errors = 0)) + } + + # Process each overlapping field + for (field_idx in overlapping_indices) { + field_name <- fields$field[field_idx] + field_geom <- fields_reprojected[field_idx, ] + + # Create field directory + field_dir <- file.path(output_base_dir, field_name) + if (!dir.exists(field_dir)) { + dir.create(field_dir, recursive = TRUE, showWarnings = FALSE) + } + + # Output file path + output_path <- file.path(field_dir, paste0(tif_date, ".tif")) + + # Check if file already exists (idempotent) + if (file.exists(output_path)) { + skipped <- skipped + 1 + next + } + + # Crop raster to field boundary + tryCatch({ + field_rast <- crop(rast, field_geom) + writeRaster(field_rast, output_path, overwrite = TRUE) + created <- created + 1 + }, error = function(e) { + safe_log(paste("ERROR cropping field", field_name, ":", e$message), "ERROR") + errors <<- errors + 1 + }) + } + + return(list(created = created, skipped = skipped, errors = errors)) +} + +# ============================================================================== +#' Process New Merged TIFFs into Per-Field Structure +#' +#' Batch processes all new 4-band raw satellite TIFFs from merged_tif/ directory. +#' Crops each TIFF to field boundaries and stores results in per-field subdirectories. +#' Supports migration mode: skips dates already processed and migrated to field_tiles_CI/. +#' +#' @param merged_tif_dir Character. Source directory containing raw merged TIFFs +#' (e.g., "merged_tif/"). +#' @param field_tiles_dir Character. Target directory for per-field TIFFs +#' (e.g., "field_tiles/"). +#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column. +#' @param field_tiles_ci_dir Character. Optional. Directory where migrated CI-calculated +#' TIFFs are stored. If provided, skips dates +#' already processed and moved to field_tiles_CI/. +#' Default: NULL (process all dates). +#' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD). +#' Default: NULL (process all available dates). +#' @param offset Integer. Optional. Number of days to look back from end_date. +#' Only used if end_date is also provided. +#' Default: NULL (process all available dates). +#' +#' @return List with elements: +#' - total_created: Integer. Total field TIFFs created across all dates +#' - total_skipped: Integer. Total files that already existed +#' - total_errors: Integer. Total cropping errors across all dates +#' +#' @details +#' **Migration Logic:** +#' When migration phase is active (field_tiles_CI/ contains CI-calculated TIFFs), +#' this function detects which dates have been migrated and skips reprocessing them. +#' This prevents redundant work during the per-field architecture transition. +#' +#' **Date Detection:** +#' Finds all files in merged_tif_dir matching pattern: YYYY-MM-DD.tif +#' +#' **Output Structure:** +#' Creates per-field subdirectories and stores: field_tiles/{FIELD}/{DATE}.tif +#' +#' @examples +#' \dontrun{ +#' fields <- load_field_boundaries("path/to/pivot.geojson") +#' result <- process_new_merged_tif( +#' merged_tif_dir = "merged_tif", +#' field_tiles_dir = "field_tiles", +#' fields = fields, +#' field_tiles_ci_dir = "field_tiles_CI" +#' ) +#' cat(sprintf("Processing complete: created=%d, skipped=%d, errors=%d\n", +#' result$total_created, result$total_skipped, result$total_errors)) +#' } +#' +process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL, + end_date = NULL, offset = NULL) { + + safe_log("\n========================================", "INFO") + safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO") + safe_log("========================================", "INFO") + + # Check if download directory exists + if (!dir.exists(merged_tif_dir)) { + safe_log("No merged_tif/ directory found - no new data to process", "WARNING") + return(list(total_created = 0, total_skipped = 0, total_errors = 0)) + } + + # Create output directory + if (!dir.exists(field_tiles_dir)) { + dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE) + } + + # Find all date-pattern TIFFs in root of merged_tif + tiff_files <- list.files( + merged_tif_dir, + pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$", + full.names = TRUE + ) + + # FILTER by date window if end_date and offset provided + if (!is.null(end_date) && !is.null(offset)) { + start_date <- end_date - offset + date_range <- seq(start_date, end_date, by = "day") + date_range_str <- format(date_range, "%Y-%m-%d") + + # Extract dates from filenames and filter + tiff_dates <- gsub("\\.tif$", "", basename(tiff_files)) + tiff_files <- tiff_files[tiff_dates %in% date_range_str] + + safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO") + } + + safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO") + + if (length(tiff_files) == 0) { + safe_log("No new TIFFs found - nothing to process", "WARNING") + return(list(total_created = 0, total_skipped = 0, total_errors = 0)) + } + + # Process each new TIFF + total_created <- 0 + total_skipped <- 0 + total_errors <- 0 + + for (tif_path in tiff_files) { + tif_date <- gsub("\\.tif$", "", basename(tif_path)) + + # CHECK 1: Skip if this date was already migrated to field_tiles_CI/ + # (This means Script 20 already processed it and extracted RDS) + if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) { + # Check if ANY field has this date in field_tiles_CI/ + date_migrated <- FALSE + + # Sample check: look for date in field_tiles_CI/*/DATE.tif + sample_field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE) + for (field_dir in sample_field_dirs) { + potential_file <- file.path(field_dir, paste0(tif_date, ".tif")) + if (file.exists(potential_file)) { + date_migrated <- TRUE + break + } + } + + if (date_migrated) { + safe_log(paste("Skipping:", tif_date, "(already migrated and processed by Script 20)"), "INFO") + total_skipped <- total_skipped + 1 + next + } + } + + # CHECK 2: Skip if this date already exists in field_tiles/ + # (means this date has already been processed through Script 10) + if (dir.exists(field_tiles_dir)) { + date_exists_in_field_tiles <- FALSE + + # Check if ANY field directory has this date + field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + for (field_dir in field_dirs) { + potential_file <- file.path(field_dir, paste0(tif_date, ".tif")) + if (file.exists(potential_file)) { + date_exists_in_field_tiles <- TRUE + break + } + } + + if (date_exists_in_field_tiles) { + safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO") + total_skipped <- total_skipped + 1 + next + } + } + + safe_log(paste("Processing:", tif_date), "INFO") + + result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir) + total_created <- total_created + result$created + total_skipped <- total_skipped + result$skipped + total_errors <- total_errors + result$errors + } + + safe_log(paste("Processing complete: created =", total_created, + ", skipped =", total_skipped, ", errors =", total_errors), "INFO") + + return(list(total_created = total_created, total_skipped = total_skipped, + total_errors = total_errors)) +} diff --git a/r_app/20_ci_extraction.R b/r_app/20_ci_extraction.R deleted file mode 100644 index ab82188..0000000 --- a/r_app/20_ci_extraction.R +++ /dev/null @@ -1,202 +0,0 @@ -# CI_EXTRACTION.R -# ============== -# This script processes satellite imagery to extract Canopy Index (CI) values for agricultural fields. -# It handles image processing, masking, and extraction of statistics by field/sub-field. -# Supports both 4-band and 8-band PlanetScope data with automatic band detection and cloud masking. -# -# Usage: Rscript 02_ci_extraction.R [end_date] [offset] [project_dir] [data_source] -# - end_date: End date for processing (YYYY-MM-DD format) -# - offset: Number of days to look back from end_date -# - project_dir: Project directory name (e.g., "angata", "aura", "chemba") -# - data_source: Data source directory - "merged_tif_8b" (default) or "merged_tif" (4-band) or "merged_final_tif" -# If tiles exist (daily_tiles_split/), they are used automatically -# -# Examples: -# # Angata 8-band data (with UDM cloud masking) -# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif_8b -# -# # Aura 4-band data -# Rscript 20_ci_extraction.R 2025-11-26 7 aura merged_tif -# -# # Auto-detects and uses tiles if available: -# Rscript 20_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists) - -# 1. Load required packages -# ----------------------- -suppressPackageStartupMessages({ - library(sf) - library(terra) - library(tidyverse) - library(lubridate) - library(readxl) - library(here) - library(furrr) - library(future) -}) - -# 2. Process command line arguments -# ------------------------------ -main <- function() { - # Capture command line arguments - args <- commandArgs(trailingOnly = TRUE) - - # Process end_date argument - if (length(args) >= 1 && !is.na(args[1])) { - end_date <- as.Date(args[1]) - if (is.na(end_date)) { - warning("Invalid end_date provided. Using default (current date).") - end_date <- Sys.Date() - #end_date <- "2023-10-01" - } - } else { - end_date <- Sys.Date() - #end_date <- "2023-10-01" - } - - # Process offset argument - if (length(args) >= 2 && !is.na(args[2])) { - offset <- as.numeric(args[2]) - if (is.na(offset) || offset <= 0) { - warning("Invalid offset provided. Using default (7 days).") - offset <- 7 - } - } else { - offset <- 7 - } - - # Process project_dir argument - if (length(args) >= 3 && !is.na(args[3])) { - project_dir <- as.character(args[3]) - } else if (exists("project_dir", envir = .GlobalEnv)) { - project_dir <- get("project_dir", envir = .GlobalEnv) - } else { - project_dir <- "angata" # Changed default from "aura" to "esa" - } - - # Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif) - if (length(args) >= 4 && !is.na(args[4])) { - data_source <- as.character(args[4]) - # Validate data_source is a recognized option - if (!data_source %in% c("merged_tif_8b", "merged_tif", "merged_final_tif")) { - warning(paste("Data source", data_source, "not in standard list. Using as-is.")) - } - } else if (exists("data_source", envir = .GlobalEnv)) { - data_source <- get("data_source", envir = .GlobalEnv) - } else { - data_source <- "merged_tif_8b" # Default to 8-band (newer data with cloud masking) - } - - # Make project_dir and data_source available globally - assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) - - cat(sprintf("CI Extraction: project=%s, end_date=%s, offset=%d days, data_source=%s\n", - project_dir, format(end_date, "%Y-%m-%d"), offset, data_source)) - - # Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction) - ci_extraction_script <- TRUE - assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv) - - # 3. Initialize project configuration - # -------------------------------- - new_project_question <- TRUE - - cat("[DEBUG] Attempting to source r_app/parameters_project.R\n") - tryCatch({ - source("r_app/parameters_project.R") - cat("[DEBUG] Successfully sourced r_app/parameters_project.R\n") - }, error = function(e) { - cat("[ERROR] Failed to source r_app/parameters_project.R:\n", e$message, "\n") - stop(e) - }) - - cat("[DEBUG] Attempting to source r_app/20_ci_extraction_utils.R\n") - tryCatch({ - source("r_app/20_ci_extraction_utils.R") - cat("[DEBUG] Successfully sourced r_app/20_ci_extraction_utils.R\n") - }, error = function(e) { - cat("[ERROR] Failed to source r_app/20_ci_extraction_utils.R:\n", e$message, "\n") - stop(e) - }) - - - # 4. Generate date list for processing - # --------------------------------- - dates <- date_list(end_date, 7) - log_message(paste("Processing data for week", dates$week, "of", dates$year)) - - # 5. Find and filter raster files by date - with grid size detection - # ----------------------------------- - log_message("Searching for raster files") - - # Check if tiles exist (Script 01 output) - detect grid size dynamically - tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split") - - # Detect grid size from daily_tiles_split folder structure - # Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc. - grid_size <- NA - if (dir.exists(tiles_split_base)) { - subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) - # Look for grid size patterns like "5x5", "10x10", "20x20" - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - grid_size <- grid_patterns[1] # Use first grid size found - log_message(paste("Detected grid size:", grid_size)) - } - } - - # Construct tile folder path with grid size - if (!is.na(grid_size)) { - tile_folder <- file.path(tiles_split_base, grid_size) - } else { - tile_folder <- tiles_split_base - } - - use_tiles <- dir.exists(tile_folder) - - # Make grid_size available globally for other functions - assign("grid_size", grid_size, envir = .GlobalEnv) - - tryCatch({ - if (use_tiles) { - # Use tile-based processing - log_message(paste("Tile folder detected at", tile_folder)) - log_message("Using tile-based CI extraction") - - # Call the tile-based extraction function - process_ci_values_from_tiles( - dates = dates, - tile_folder = tile_folder, - field_boundaries = field_boundaries, - field_boundaries_sf = field_boundaries_sf, - daily_CI_vals_dir = daily_CI_vals_dir, - cumulative_CI_vals_dir = cumulative_CI_vals_dir, - merged_final_dir = merged_final, - grid_size = grid_size - ) - - } else { - # Use legacy full-extent processing - log_message("No tiles found. Using legacy full-extent approach") - - # Use the existing utility function to find satellite images - existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter) - log_message(paste("Found", length(existing_files), "raster files for processing")) - - # Process raster files and create VRT - vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt) - - # Process and combine CI values - process_ci_values(dates, field_boundaries, merged_final, - field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir) - } - - }, error = function(e) { - log_message(paste("Error in main processing:", e$message), level = "ERROR") - stop(e$message) - }) -} - -if (sys.nframe() == 0) { - main() -} diff --git a/r_app/20_ci_extraction_per_field.R b/r_app/20_ci_extraction_per_field.R new file mode 100644 index 0000000..701a108 --- /dev/null +++ b/r_app/20_ci_extraction_per_field.R @@ -0,0 +1,238 @@ +# CI_EXTRACTION_PER_FIELD.R +# ========================= +# Script 20 (Refactored for Per-Field Architecture) +# +# This script reads per-field TIFFs from Script 10 output and: +# 1. Calculates Canopy Index (CI) from 4-band imagery (RGB + NIR) +# 2. Outputs 5-band TIFFs with CI as the 5th band to field_tiles_CI/{FIELD}/{DATE}.tif +# 3. Outputs per-field per-date RDS files to daily_vals/{FIELD}/{DATE}.rds +# +# Key differences from legacy Script 20: +# - Input: field_tiles/{FIELD}/{DATE}.tif (4-band, from Script 10) +# - Output: field_tiles_CI/{FIELD}/{DATE}.tif (5-band with CI) +# - Output: daily_vals/{FIELD}/{DATE}.rds (per-field CI statistics) +# - Directly extracts CI statistics per sub_field within each field +# +# Usage: +# Rscript 20_ci_extraction_per_field.R [project_dir] [end_date] [offset] +# Example: Rscript 20_ci_extraction_per_field.R angata 2026-01-02 7 + +suppressPackageStartupMessages({ + library(sf) + library(terra) + library(tidyverse) + library(lubridate) + library(here) +}) + +# ============================================================================= +# Main Processing +# ============================================================================= + +main <- function() { + # STEP 1: Set working directory to project root (smartcane/) + # This ensures all relative paths resolve correctly + if (basename(getwd()) == "r_app") { + setwd("..") + } + + # STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations) + # Parse command-line arguments FIRST + args <- commandArgs(trailingOnly = TRUE) + project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata" + end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date() + offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7 + + # Make project_dir available globally for parameters_project.R + assign("project_dir", project_dir, envir = .GlobalEnv) + + # Load parameters_project.R (provides safe_log, date_list, setup_project_directories, etc.) + tryCatch({ + source("r_app/parameters_project.R") + }, error = function(e) { + cat(sprintf("Error loading parameters_project.R: %s\n", e$message)) + stop(e) + }) + + # Load CI extraction utilities + tryCatch({ + source("r_app/20_ci_extraction_utils.R") + }, error = function(e) { + cat(sprintf("Error loading 20_ci_extraction_utils.R: %s\n", e$message)) + stop(e) + }) + + # STEP 3: Now all utilities are loaded, proceed with script logic + safe_log(sprintf("=== Script 20: CI Extraction Per-Field ===")) + safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days", + project_dir, format(end_date, "%Y-%m-%d"), offset)) + + # Set up directory paths from parameters + setup <- setup_project_directories(project_dir) + + # Load field boundaries directly from field_boundaries_path in setup + tryCatch({ + field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE) + safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path)) + }, error = function(e) { + safe_log(sprintf("Error loading field boundaries from %s: %s", setup$field_boundaries_path, e$message), "ERROR") + stop(e) + }) + + # Get list of dates to process + # If in migration mode, dates_to_process is provided by the pipeline runner + if (exists("dates_to_process") && !is.null(dates_to_process)) { + # Migration mode: Use provided list of dates (process ALL available dates) + dates_filter <- sort(dates_to_process) + safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter))) + } else { + # Normal mode: Use 7-day offset window + dates <- date_list(end_date, offset) + dates_filter <- dates$days_filter + safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)", + dates$start_date, dates$end_date, length(dates_filter))) + } + + safe_log(sprintf("Input directory: %s", setup$field_tiles_dir)) + safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir)) + safe_log(sprintf("Output RDS directory: %s", setup$daily_ci_vals_dir)) + + # Process each field + if (!dir.exists(setup$field_tiles_dir)) { + safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR") + stop("Script 10 output not found. Run Script 10 first.") + } + + fields <- list.dirs(setup$field_tiles_dir, full.names = FALSE, recursive = FALSE) + fields <- fields[fields != ""] # Remove empty strings + + if (length(fields) == 0) { + safe_log("No fields found in field_tiles directory", "WARNING") + return() + } + + safe_log(sprintf("Found %d fields to process", length(fields))) + + # DEBUG: Check what paths are available in setup + safe_log(sprintf("[DEBUG] Available setup paths: %s", paste(names(setup), collapse=", "))) + safe_log(sprintf("[DEBUG] field_tiles_ci_dir: %s", setup$field_tiles_ci_dir)) + safe_log(sprintf("[DEBUG] daily_ci_vals_dir: %s", setup$daily_ci_vals_dir)) + + # Use daily_ci_vals_dir for per-field daily CI output + # Pre-create output subdirectories for all fields + for (field in fields) { + dir.create(file.path(setup$field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE) + if (!is.null(setup$daily_ci_vals_dir)) { + dir.create(file.path(setup$daily_ci_vals_dir, field), showWarnings = FALSE, recursive = TRUE) + } + } + + # Process each DATE (load merged TIFF once, extract all fields from it) + total_success <- 0 + total_error <- 0 + + for (date_str in dates_filter) { + # Load the MERGED TIFF (farm-wide) ONCE for this date + input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str)) + + if (!file.exists(input_tif_merged)) { + safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str)) + total_error <<- total_error + 1 + next + } + + tryCatch({ + # Load 4-band TIFF ONCE + raster_4band <- terra::rast(input_tif_merged) + safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields))) + + # Calculate CI from 4-band + ci_raster <- calc_ci_from_raster(raster_4band) + + # Create 5-band (R, G, B, NIR, CI) + five_band <- c(raster_4band, ci_raster) + + # Now process all fields from this single merged TIFF + fields_processed_this_date <- 0 + + for (field in fields) { + field_ci_path <- file.path(setup$field_tiles_ci_dir, field) + field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field) + + # Pre-create output directories + dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE) + dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE) + + output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str)) + output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str)) + + # MODE 3: Skip if both outputs already exist + if (file.exists(output_tif) && file.exists(output_rds)) { + next + } + + # MODE 2: Regeneration mode - RDS missing but CI TIFF exists + if (file.exists(output_tif) && !file.exists(output_rds)) { + tryCatch({ + extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field) + fields_processed_this_date <- fields_processed_this_date + 1 + }, error = function(e) { + # Continue to next field + }) + next + } + + # MODE 1: Normal mode - crop 5-band TIFF to field boundary and save + tryCatch({ + # Crop 5-band TIFF to field boundary + field_geom <- field_boundaries_sf %>% filter(field == !!field) + five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE) + + # Save 5-band field TIFF + terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE) + + # Extract CI statistics by sub_field (from cropped CI raster) + ci_cropped <- five_band_cropped[[5]] # 5th band is CI + ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field) + + # Save RDS + if (!is.null(ci_stats) && nrow(ci_stats) > 0) { + saveRDS(ci_stats, output_rds) + } + + fields_processed_this_date <- fields_processed_this_date + 1 + + }, error = function(e) { + # Error in individual field, continue to next + safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING") + }) + } + + # Increment success counter if at least one field succeeded + if (fields_processed_this_date > 0) { + total_success <<- total_success + 1 + safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date)) + } + + }, error = function(e) { + total_error <<- total_error + 1 + safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR") + }) + } + + # Summary + safe_log(sprintf("\n=== Processing Complete ===")) + safe_log(sprintf("Successfully processed: %d", total_success)) + safe_log(sprintf("Errors encountered: %d", total_error)) + + if (total_success > 0) { + safe_log("Output files created in:") + safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir)) + safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir)) + } +} + +# Execute main if called from command line +if (sys.nframe() == 0) { + main() +} diff --git a/r_app/20_ci_extraction_utils.R b/r_app/20_ci_extraction_utils.R index 5efeb5e..f8a88c5 100644 --- a/r_app/20_ci_extraction_utils.R +++ b/r_app/20_ci_extraction_utils.R @@ -6,24 +6,10 @@ # # Parallel Processing: Tile-based extraction uses furrr::future_map to process # multiple tiles simultaneously (typically 2-4 tiles in parallel depending on CPU cores) - -#' Safe logging function that works whether log_message exists or not -#' -#' @param message The message to log -#' @param level The log level (default: "INFO") -#' @return NULL (used for side effects) -#' -safe_log <- function(message, level = "INFO") { - if (exists("log_message")) { - log_message(message, level) - } else { - if (level %in% c("ERROR", "WARNING")) { - warning(message) - } else { - message(message) - } - } -} +# +# Per-Field Functions (Script 20): +# - calc_ci_from_raster(): Calculate CI from 4-band raster (Chlorophyll Index formula: NIR/Green - 1) +# - extract_ci_by_subfield(): Extract per-sub_field CI statistics from raster #' Generate a sequence of dates for processing #' @@ -216,9 +202,10 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) { names(blue_band) <- "Blue" names(nir_band) <- "NIR" - # Calculate Canopy Index from Red, Green, NIR - # CI = (NIR - Red) / (NIR + Red) is a common formulation - # But using NIR/Green - 1 is also valid and more sensitive to green vegetation + # Calculate Canopy Index from Green and NIR + # *** CRITICAL: Use CHLOROPHYLL INDEX formula ONLY *** + # CORRECT: CI = NIR / Green - 1 (ranges ~1-7, sensitive to active chlorophyll) + # WRONG: Do NOT use (NIR-Red)/(NIR+Red) - that is NDVI, ranges -1 to 1, different scale CI <- nir_band / green_band - 1 names(CI) <- "CI" @@ -294,7 +281,6 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) { # Write output files terra::writeRaster(output_raster, new_file, overwrite = TRUE) - terra::vrt(new_file, vrt_file, overwrite = TRUE) # Check if the result has enough valid pixels valid_pixels <- terra::global(output_raster$CI, "notNA", na.rm=TRUE) @@ -738,7 +724,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, date = date, field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, - merged_final_tif_dir = merged_final_dir, grid_size = grid_size ) @@ -809,7 +794,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, date = date, field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, - merged_final_tif_dir = merged_final_dir, grid_size = grid_size ) @@ -850,7 +834,7 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, #' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return Data frame with field CI statistics for this tile, or NULL if processing failed #' -process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = NA) { +process_single_tile <- function(tile_file, field_boundaries_sf, date, grid_size = NA) { tryCatch({ tile_filename <- basename(tile_file) safe_log(paste(" [TILE] Loading:", tile_filename)) @@ -892,25 +876,10 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band) names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI") - # Save processed tile to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ with same filename - # This mirrors the input structure: daily_tiles_split/[GRID_SIZE]/[DATE]/ - if (!is.na(grid_size)) { - date_dir <- file.path(merged_final_tif_dir, grid_size, date) - } else { - date_dir <- file.path(merged_final_tif_dir, date) - } - - if (!dir.exists(date_dir)) { - dir.create(date_dir, recursive = TRUE, showWarnings = FALSE) - } - - # Use same filename as source tile (e.g., 2026-01-02_01.tif) - tile_filename <- basename(tile_file) - output_file <- file.path(date_dir, tile_filename) - - # Write processed tile - terra::writeRaster(output_raster, output_file, overwrite = TRUE) - safe_log(paste(" [SAVED TIFF] Output:", file.path(date, basename(output_file)), "(5 bands: Red, Green, Blue, NIR, CI)")) + # NOTE: Do NOT save processed tile - it's an intermediate only + # The purpose is to calculate field-level CI statistics, not to create permanent tile files + # This prevents bloat in merged_final_tif/ directory (would unnecessarily duplicate + # daily_tiles_split data with an extra CI band added) # Extract statistics per field from CI band field_bbox <- sf::st_bbox(field_boundaries_sf) @@ -951,7 +920,7 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin #' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return Data frame with field CI statistics for the date #' -extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL, grid_size = NA) { +extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, grid_size = NA) { if (!inherits(field_boundaries_sf, "sf")) { field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf) @@ -970,7 +939,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C stats_list <- tryCatch({ furrr::future_map( tile_files, - ~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size), + ~ process_single_tile(.x, field_boundaries_sf, date, grid_size = grid_size), .progress = FALSE, .options = furrr::furrr_options(seed = TRUE) ) @@ -980,7 +949,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C lapply( tile_files, function(tile_file) { - process_single_tile(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size) + process_single_tile(tile_file, field_boundaries_sf, date, grid_size = grid_size) } ) }) @@ -1013,3 +982,208 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C return(aggregated) } + +# ============================================================================= +# Script 20 (Per-Field) Specific Functions +# ============================================================================= + +#' Calculate Canopy Index (CI) from 4-band raster +#' +#' *** CRITICAL FORMULA: CI = NIR / Green - 1 *** +#' This is the CHLOROPHYLL INDEX formula (ranges ~1-7 for vegetation). +#' NOT NDVI! Do NOT use (NIR-Red)/(NIR+Red) - that produces -1 to 1 range. +#' +#' Expects band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4) +#' +#' @param raster_obj Loaded raster object with at least 4 bands +#' @return Raster object containing CI values (Chlorophyll Index, ranges ~1-7) +#' +calc_ci_from_raster <- function(raster_obj) { + # Expected band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4) + if (terra::nlyr(raster_obj) < 4) { + stop("Raster has fewer than 4 bands. Cannot calculate CI.") + } + + green <- terra::subset(raster_obj, 2) # Green band (required for proper CI calculation) + nir <- terra::subset(raster_obj, 4) # NIR + + # *** CHLOROPHYLL INDEX = NIR / Green - 1 *** + # This formula is sensitive to active chlorophyll content and ranges ~1-7 + # DO NOT use (NIR-Red)/(NIR+Red) - that is NDVI (Normalized Difference Vegetation Index) + # NDVI ranges -1 to 1 and is different from Chlorophyll Index + ci <- nir / green - 1 + + return(ci) +} + +#' Extract CI statistics by sub_field from a CI raster +#' +#' For a given field, masks the CI raster to each sub_field polygon +#' and calculates statistics (mean, median, sd, min, max, count). +#' +#' @param ci_raster Raster object containing CI values +#' @param field_boundaries_sf SF object containing field/sub_field polygons +#' @param field_name Character string of the field name to process +#' @return Data frame with field, sub_field, and CI statistics; NULL if field not found +#' +extract_ci_by_subfield <- function(ci_raster, field_boundaries_sf, field_name) { + # NOTE: Per-field TIFFs are already cropped to field boundaries by Script 10 + # No need to mask again - just extract all valid pixels from the raster + + # Extract ALL CI values (no masking needed for pre-cropped per-field TIFFs) + ci_values <- terra::values(ci_raster, na.rm = TRUE) + + if (length(ci_values) > 0) { + result_row <- data.frame( + field = field_name, + sub_field = field_name, # Use field_name as sub_field since TIFF is already field-specific + ci_mean = mean(ci_values, na.rm = TRUE), + ci_median = median(ci_values, na.rm = TRUE), + ci_sd = sd(ci_values, na.rm = TRUE), + ci_min = min(ci_values, na.rm = TRUE), + ci_max = max(ci_values, na.rm = TRUE), + ci_count = length(ci_values), + stringsAsFactors = FALSE + ) + } else { + result_row <- data.frame( + field = field_name, + sub_field = field_name, + ci_mean = NA_real_, + ci_median = NA_real_, + ci_sd = NA_real_, + ci_min = NA_real_, + ci_max = NA_real_, + ci_count = 0, + stringsAsFactors = FALSE + ) + } + + return(result_row) +} + +#' Extract RDS from existing CI TIFF (Migration/Regeneration Mode) +#' +#' This function extracts CI statistics from an already-calculated CI TIFF +#' without needing to recalculate from raw 4-band imagery. +#' Used during migration when field_tiles_CI/ exists but daily_vals/{FIELD}/ is missing. +#' +#' @param ci_tiff_path Path to the 5-band TIFF containing CI as the 5th band +#' @param output_rds_path Path where to save the output RDS file +#' @param field_boundaries_sf SF object with field/sub_field polygons +#' @param field_name Name of the field to extract +#' @return The RDS data frame (invisibly) and saves to disk +#' +extract_rds_from_ci_tiff <- function(ci_tiff_path, output_rds_path, field_boundaries_sf, field_name) { + tryCatch({ + # Load the 5-band TIFF + raster_5band <- terra::rast(ci_tiff_path) + + # Extract CI (5th band) + # Assuming structure: [1]=R, [2]=G, [3]=B, [4]=NIR, [5]=CI + ci_raster <- raster_5band[[5]] + + # Extract CI statistics by sub_field + ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field_name) + + # Save RDS + if (!is.null(ci_stats) && nrow(ci_stats) > 0) { + saveRDS(ci_stats, output_rds_path) + return(invisible(ci_stats)) + } else { + safe_log(sprintf("No CI statistics extracted from %s", ci_tiff_path), "WARNING") + return(invisible(NULL)) + } + + }, error = function(e) { + safe_log(sprintf("Error extracting RDS from CI TIFF: %s", e$message), "ERROR") + return(invisible(NULL)) + }) +} + +#' Regenerate ALL missing RDS files from existing CI TIFFs (Comprehensive Migration Mode) +#' +#' This function processes ALL dates in field_tiles_CI/ and extracts RDS for any missing daily_vals/ +#' files. No date window filtering - processes the entire historical archive. +#' +#' Used for one-time migration of old projects where field_tiles_CI/ is populated but daily_vals/ +#' RDS files are missing or incomplete. +#' +#' @param field_tiles_ci_dir Path to field_tiles_CI/ (input: pre-calculated CI TIFFs) +#' @param daily_vals_dir Path to daily_vals/ (output: RDS statistics files) +#' @param field_boundaries_sf SF object with field/sub_field polygons +#' @param fields Vector of field names to process +#' +#' @return List with summary stats: list(total_processed=N, total_skipped=M, total_errors=K) +#' +regenerate_all_missing_rds <- function(field_tiles_ci_dir, daily_vals_dir, field_boundaries_sf, fields) { + safe_log("\n========================================") + safe_log("MIGRATION MODE: REGENERATING ALL MISSING RDS") + safe_log("Processing ALL dates in field_tiles_CI/") + safe_log("========================================") + + total_processed <- 0 + total_skipped <- 0 + total_errors <- 0 + + # Iterate through each field + for (field in fields) { + field_ci_path <- file.path(field_tiles_ci_dir, field) + field_daily_vals_path <- file.path(daily_vals_dir, field) + + # Skip if field directory doesn't exist + if (!dir.exists(field_ci_path)) { + safe_log(sprintf(" Field %s: field_tiles_CI not found (skipping)", field)) + continue + } + + # Create output directory for RDS + dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE) + + # Find ALL CI TIFFs for this field (no date filtering) + ci_tiff_files <- list.files( + path = field_ci_path, + pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$", + full.names = TRUE + ) + + if (length(ci_tiff_files) == 0) { + safe_log(sprintf(" Field %s: No CI TIFFs found (skipping)", field)) + next + } + + safe_log(sprintf(" Field %s: Found %d CI TIFFs to process", field, length(ci_tiff_files))) + + # Process each CI TIFF + for (ci_tiff in ci_tiff_files) { + date_str <- tools::file_path_sans_ext(basename(ci_tiff)) + output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str)) + + # Skip if RDS already exists + if (file.exists(output_rds)) { + total_skipped <- total_skipped + 1 + next + } + + # Extract RDS from CI TIFF + tryCatch({ + extract_rds_from_ci_tiff(ci_tiff, output_rds, field_boundaries_sf, field) + safe_log(sprintf(" %s: ✓ RDS extracted", date_str)) + total_processed <- total_processed + 1 + }, error = function(e) { + safe_log(sprintf(" %s: ✗ Error - %s", date_str, e$message), "ERROR") + total_errors <<- total_errors + 1 + }) + } + } + + safe_log(sprintf("\nMigration complete: processed %d, skipped %d, errors %d", + total_processed, total_skipped, total_errors)) + + return(list( + total_processed = total_processed, + total_skipped = total_skipped, + total_errors = total_errors + )) +} + diff --git a/r_app/21_convert_ci_rds_to_csv.R b/r_app/21_convert_ci_rds_to_csv.R index f75f6af..78af1bb 100644 --- a/r_app/21_convert_ci_rds_to_csv.R +++ b/r_app/21_convert_ci_rds_to_csv.R @@ -1,22 +1,62 @@ -# 02b_CONVERT_CI_RDS_TO_CSV.R -# ============================ -# Convert combined_CI_data.rds (output of script 02) to CSV format for Python -# This script runs AFTER script 02 (CI extraction) and creates a CSV that Python -# can use for harvest date detection WITHOUT requiring the 'model' column (which -# comes from script 03 after interpolation and harvest dates are known). +# ============================================================================ +# SCRIPT 21: Convert CI RDS to CSV (Python Compatibility Format) +# ============================================================================ +# PURPOSE: +# Convert consolidated CI data from R's wide format (RDS) to Python-compatible +# long format (CSV). Prepares per-field CI time series for harvest detection +# models and Python ML workflows without requiring interpolated/modeled values. # -# Usage: Rscript 02b_convert_ci_rds_to_csv.R [project_dir] -# - project_dir: Project directory name (e.g., "esa", "chemba", "angata") +# INPUT DATA: +# - Source: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# - Format: RDS (interpolated growth model data from Script 30) +# - Requirement: Script 30 must have completed growth model interpolation # -# Output: CSV file at laravel_app/storage/app/{project_dir}/Data/extracted_ci/cumulative_vals/ci_data_for_python.csv -# Columns: field, sub_field, Date, FitData, DOY, value (alias for FitData) +# OUTPUT DATA: +# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/ +# - Format: CSV (long format) +# - Columns: field, sub_field, Date, FitData, DOY, value # +# USAGE: +# Rscript 21_convert_ci_rds_to_csv.R [project] +# +# Example (Windows PowerShell): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata +# +# PARAMETERS: +# - project: Project name (character) - angata, chemba, xinavane, esa, simba +# +# CLIENT TYPES: +# - cane_supply (ANGATA): Yes - data export +# - agronomic_support (AURA): Yes - Python ML integration +# +# DEPENDENCIES: +# - Packages: tidyverse, lubridate, zoo +# - Utils files: parameters_project.R +# - Input data: combined_CI_data.rds from Script 20 +# - Data directories: extracted_ci/cumulative_vals/ +# +# NOTES: +# - Data source: Uses interpolated CI data from Script 30 (growth model output) +# - Handles both wide format and long format inputs from growth model +# - DOY (Day of Year): Calculated from date for seasonal analysis +# - Python integration: CSV format compatible with pandas/scikit-learn workflows +# - Used by: Python harvest detection models (harvest_date_prediction.py) +# - Exports complete growth curves with interpolated values for ML training +# +# RELATED ISSUES: +# SC-112: Utilities restructuring +# SC-108: Core pipeline improvements +# +# ============================================================================ suppressPackageStartupMessages({ - library(tidyverse) - library(lubridate) - library(zoo) - library(here) + # File path handling + library(here) # For relative path resolution (platform-independent file paths) + + # Data manipulation + library(tidyverse) # For dplyr, readr (data wrangling and CSV I/O) + library(lubridate) # For date/time operations (DOY calculation) + library(zoo) # For zoo objects (gap filling, rolling operations) }) # ============================================================================ @@ -140,7 +180,7 @@ main <- function() { cat(sprintf("Converting CI RDS to CSV: project=%s\n", project_dir)) - # Initialize project configuration + # Initialize project configuration and centralized paths tryCatch({ source("parameters_project.R") }, error = function(e) { @@ -152,49 +192,63 @@ main <- function() { }) }) - # Define paths - ci_data_source_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals") - ci_data_output_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "ci_data_for_python") + # Load centralized path structure (creates all directories automatically) + paths <- setup_project_directories(project_dir) - # Create output directory if it doesn't exist (for new projects) - if (!dir.exists(ci_data_output_dir)) { - dir.create(ci_data_output_dir, recursive = TRUE, showWarnings = FALSE) - cat(sprintf("✓ Created output directory: %s\n", ci_data_output_dir)) - } + # Use centralized paths (no need for dir.create - already handled) + ci_data_source_dir <- paths$cumulative_ci_vals_dir + ci_data_output_dir <- paths$ci_for_python_dir - input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds") + # Try to load interpolated growth model data from Script 30 + input_file <- file.path(ci_data_source_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds") output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv") # Check if input file exists if (!file.exists(input_file)) { - stop(paste("Input file not found:", input_file)) + stop(paste("Input file not found:", input_file, + "\nScript 30 (growth model) must be run before Script 21.")) } cat(sprintf("Loading: %s\n", input_file)) - # Load RDS file - ci_data_wide <- readRDS(input_file) %>% + # Load RDS file (from Script 30 - already in long format with interpolated values) + ci_data <- readRDS(input_file) %>% as_tibble() - cat(sprintf(" Loaded %d rows\n", nrow(ci_data_wide))) - cat(sprintf(" Format: WIDE (field, sub_field, then dates as columns)\n")) - cat(sprintf(" Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", "))) + cat(sprintf(" Loaded %d rows\n", nrow(ci_data))) + cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", "))) - # Step 1: Convert from WIDE to LONG format - cat("\nStep 1: Converting from wide to long format...\n") - ci_data_long <- wide_to_long_ci_data(ci_data_wide) + # Check format and prepare for export + # If it's already in long format (from Script 30), use as-is + # Otherwise, convert from wide to long + if ("Date" %in% names(ci_data) || "date" %in% names(ci_data)) { + cat(" Detected: LONG format (from growth model)\n") + ci_data_long <- ci_data + } else { + cat(" Detected: WIDE format - converting to long...\n") + ci_data_long <- wide_to_long_ci_data(ci_data) + } - # Step 2: Create complete daily sequences with interpolation - cat("\nStep 2: Creating complete daily sequences with interpolation...\n") - ci_data_python <- create_interpolated_daily_sequences(ci_data_long) + # Step 1: Ensure Date column exists and is properly formatted + ci_data_long <- ci_data_long %>% + mutate( + Date = as.Date(Date) + ) - # Step 3: Validate output - cat("\nStep 3: Validating output...") - validate_conversion_output(ci_data_python) + # Step 2: If interpolated values already present, use them; otherwise create interpolated sequences + if ("value" %in% names(ci_data_long)) { + # Already has interpolated values from Script 30 + cat("\nStep 2: Using interpolated values from growth model...\n") + ci_data_python <- ci_data_long + } else { + # Create interpolated daily sequences + cat("\nStep 2: Creating complete daily sequences with interpolation...\n") + ci_data_python <- create_interpolated_daily_sequences(ci_data_long) + } # Step 4: Save to CSV - cat(sprintf("\nStep 4: Saving to CSV...\n")) - cat(sprintf(" Output: %s\n", output_file)) + cat(sprintf("\nStep 4: Saving to CSV...\\n")) + cat(sprintf(" Output: %s\\n", output_file)) write_csv(ci_data_python, output_file) cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python))) diff --git a/r_app/30_growth_model_utils.R b/r_app/30_growth_model_utils.R index 5281c02..647b811 100644 --- a/r_app/30_growth_model_utils.R +++ b/r_app/30_growth_model_utils.R @@ -4,57 +4,143 @@ # =================== # Utility functions for growth model interpolation and manipulation. # These functions support the creation of continuous growth models from point measurements. +# +# PERFORMANCE OPTIMIZATION: +# - Parallel file I/O: Reads 450k+ RDS files using furrr::future_map_dfr() +# - Parallel field interpolation: Processes fields in parallel (1 core per ~100 fields) +# - Dynamic CPU detection: Allocates workers based on available cores +# - Windows compatible: Uses furrr with plan(multisession) for cross-platform support -#' Safe logging function that works whether log_message exists or not +#' Load and prepare the combined CI data (Per-Field Architecture) +#' OPTIMIZE: Filters by date during load (skip unnecessary date ranges) +#' PARALLELIZE: Reads 450k+ RDS files in parallel using furrr::future_map_dfr() #' -#' @param message The message to log -#' @param level The log level (default: "INFO") -#' @return NULL (used for side effects) +#' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals) +#' @param harvesting_data Optional: Dataframe with season dates. If provided, only loads files within season ranges (major speedup) +#' @return Long-format dataframe with CI values by date and field #' -safe_log <- function(message, level = "INFO") { - if (exists("log_message")) { - log_message(message, level) - } else { - if (level %in% c("ERROR", "WARNING")) { - warning(message) - } else { - message(message) +load_combined_ci_data <- function(daily_vals_dir, harvesting_data = NULL) { + # For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals + # Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds + + if (!dir.exists(daily_vals_dir)) { + stop(paste("Daily values directory not found:", daily_vals_dir)) + } + + safe_log(paste("Loading per-field CI data from:", daily_vals_dir)) + + # OPTIMIZATION: If harvest data provided, extract date range to avoid loading unnecessary dates + date_filter_min <- NULL + date_filter_max <- NULL + if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) { + date_filter_min <- min(harvesting_data$season_start, na.rm = TRUE) + date_filter_max <- max(harvesting_data$season_end, na.rm = TRUE) + safe_log(sprintf("Pre-filtering by harvest season dates: %s to %s", + format(date_filter_min, "%Y-%m-%d"), + format(date_filter_max, "%Y-%m-%d"))) + } + + # Find all daily RDS files recursively (per-field structure) + # IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories + all_daily_files <- list.files( + path = daily_vals_dir, + pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$", # Only YYYY-MM-DD.rds format + full.names = TRUE, + recursive = TRUE + ) + + # Further filter: only keep files that are in a subdirectory (per-field structure) + # Exclude legacy files at the root level like "extracted_2024-02-29_whole_field.rds" + all_daily_files <- all_daily_files[basename(dirname(all_daily_files)) != "daily_vals"] + + if (length(all_daily_files) == 0) { + stop(paste("No per-field daily RDS files found in:", daily_vals_dir)) + } + + safe_log(sprintf("Found %d per-field daily RDS files (filtered from legacy format)", length(all_daily_files))) + + # OPTIMIZATION: Filter files by filename date BEFORE parallel loading + # Skip files outside harvest season (can save 60-80% of I/O on large datasets) + if (!is.null(date_filter_min) && !is.null(date_filter_max)) { + all_daily_files <- all_daily_files[ + { + dates <- as.Date(tools::file_path_sans_ext(basename(all_daily_files)), format = "%Y-%m-%d") + !is.na(dates) & dates >= date_filter_min & dates <= date_filter_max + } + ] + safe_log(sprintf("Filtered to %d files within harvest season date range", length(all_daily_files))) + } + + # Set up parallel future plan (Windows PSOCK multisession; Mac/Linux can use forking) + # Automatically detect available cores and limit to reasonable number + n_cores <- min(parallel::detectCores() - 1, 8) # Use max 8 cores (diminishing returns after) + future::plan(strategy = future::multisession, workers = n_cores) + safe_log(sprintf("Using %d parallel workers for file I/O", n_cores)) + + # Parallel file reading: future_map_dfr processes each file in parallel + # Returns combined dataframe directly (no need to rbind) + combined_long <- furrr::future_map_dfr( + all_daily_files, + .progress = TRUE, + .options = furrr::furrr_options(seed = TRUE), + function(file) { + # Extract date from filename: {YYYY-MM-DD}.rds + filename <- basename(file) + date_str <- tools::file_path_sans_ext(filename) + + # Parse date + if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) { + parsed_date <- as.Date(date_str, format = "%Y-%m-%d") + } else { + return(data.frame()) # Return empty dataframe if parse fails + } + + if (is.na(parsed_date)) { + return(data.frame()) + } + + # Read RDS file + tryCatch({ + rds_data <- readRDS(file) + + if (is.null(rds_data) || nrow(rds_data) == 0) { + return(data.frame()) + } + + # Add date column to the data + rds_data %>% + dplyr::mutate(Date = parsed_date) + + }, error = function(e) { + return(data.frame()) # Return empty dataframe on error + }) } - } -} - -#' Load and prepare the combined CI data -#' -#' @param data_dir Directory containing the combined CI data -#' @return Long-format dataframe with CI values by date -#' -load_combined_ci_data <- function(data_dir) { - file_path <- here::here(data_dir, "combined_CI_data.rds") + ) - if (!file.exists(file_path)) { - stop(paste("Combined CI data file not found:", file_path)) + # Return to sequential processing to avoid nested parallelism + future::plan(future::sequential) + + if (nrow(combined_long) == 0) { + safe_log("Warning: No valid CI data loaded from daily files", "WARNING") + return(data.frame()) } - safe_log(paste("Loading CI data from:", file_path)) + # OPTIMIZATION: Use data.table for fast filtering (10-20x faster than dplyr on large datasets) + # Reshape to long format using ci_mean as the main CI value + DT <- data.table::as.data.table(combined_long) + DT <- DT[, .(field, sub_field, ci_mean, Date)] + DT[, c("value") := list(as.numeric(ci_mean))] + DT[, ci_mean := NULL] - # Load and transform the data to long format - pivot_stats <- readRDS(file_path) %>% - dplyr::ungroup() %>% - dplyr::group_by(field, sub_field) %>% - dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop") + # Fast filtering without .distinct() (which is slow on large datasets) + # Keep rows where Date is valid, field/sub_field exist, and value is finite + DT <- DT[!is.na(Date) & !is.na(sub_field) & !is.na(field) & is.finite(value)] - pivot_stats_long <- pivot_stats %>% - tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>% - dplyr::mutate( - Date = lubridate::ymd(Date), - value = as.numeric(value) - ) %>% - tidyr::drop_na(c("value", "Date")) %>% - dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names - dplyr::filter(!is.infinite(value)) %>% - dplyr::distinct() + # Convert back to tibble for compatibility with rest of pipeline + pivot_stats_long <- dplyr::as_tibble(DT) - safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points")) + safe_log(sprintf("Loaded %d CI data points from %d daily files", + nrow(pivot_stats_long), length(all_daily_files))) return(pivot_stats_long) } @@ -65,15 +151,16 @@ load_combined_ci_data <- function(data_dir) { #' @param harvesting_data Dataframe with harvesting information #' @param field_CI_data Dataframe with CI measurements #' @param season Year of the growing season +#' @param verbose Logical: whether to log warnings/info (default TRUE). Set to FALSE during progress bar iteration. #' @return Dataframe with interpolated daily CI values #' -extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) { +extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, verbose = TRUE) { # Filter harvesting data for the given season and field name filtered_harvesting_data <- harvesting_data %>% dplyr::filter(year == season, sub_field == field_name) if (nrow(filtered_harvesting_data) == 0) { - safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING") + if (verbose) safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING") return(data.frame()) } @@ -83,7 +170,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) # Return an empty data frame if no CI data is found if (nrow(filtered_field_CI_data) == 0) { - safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING") + if (verbose) safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING") return(data.frame()) } @@ -107,12 +194,14 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) # If CI is empty after filtering, return an empty dataframe if (nrow(CI) == 0) { - safe_log(paste0("No CI data within season dates for field: ", field_name, - " (Season: ", season, ", dates: ", - format(season_start, "%Y-%m-%d"), " to ", - format(season_end, "%Y-%m-%d"), - "). Available CI data range: ", ci_date_range), - "WARNING") + if (verbose) { + safe_log(paste0("No CI data within season dates for field: ", field_name, + " (Season: ", season, ", dates: ", + format(season_start, "%Y-%m-%d"), " to ", + format(season_end, "%Y-%m-%d"), + "). Available CI data range: ", ci_date_range), + "WARNING") + } return(data.frame()) } @@ -125,25 +214,23 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) subField = field_name ) - # Log successful interpolation - safe_log(paste0("Successfully interpolated CI data for field: ", field_name, - " (Season: ", season, ", dates: ", - format(season_start, "%Y-%m-%d"), " to ", - format(season_end, "%Y-%m-%d"), - "). ", nrow(CI), " data points created.")) - + # Return data with success status return(CI) }, error = function(e) { - safe_log(paste0("Error interpolating CI data for field ", field_name, - " in season ", season, - " (", format(season_start, "%Y-%m-%d"), " to ", - format(season_end, "%Y-%m-%d"), - "): ", e$message), "ERROR") + # Return empty dataframe on error (will be tracked separately) + if (verbose) { + safe_log(paste0("Error interpolating CI data for field ", field_name, + " in season ", season, + " (", format(season_start, "%Y-%m-%d"), " to ", + format(season_end, "%Y-%m-%d"), + "): ", e$message), "ERROR") + } return(data.frame()) }) } #' Generate interpolated CI data for all fields and seasons +#' PARALLELIZE: Processes fields in parallel using furrr::future_map_df() #' #' @param years Vector of years to process #' @param harvesting_data Dataframe with harvesting information @@ -153,17 +240,19 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) { safe_log("Starting CI data interpolation for all fields") + # Track failed fields for end-of-run summary + failed_fields <- list() + total_fields <- 0 + successful_fields <- 0 + # Process each year result <- purrr::map_df(years, function(yr) { - safe_log(paste("Processing year:", yr)) - # Get the fields harvested in this year with valid season start dates sub_fields <- harvesting_data %>% dplyr::filter(year == yr, !is.na(season_start)) %>% dplyr::pull(sub_field) if (length(sub_fields) == 0) { - safe_log(paste("No fields with valid season data for year:", yr), "WARNING") return(data.frame()) } @@ -172,24 +261,74 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) { purrr::keep(~ any(ci_data$sub_field == .x)) if (length(valid_sub_fields) == 0) { - safe_log(paste("No fields with CI data for year:", yr), "WARNING") return(data.frame()) } - # Extract and interpolate data for each valid field - safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr)) + total_fields <<- total_fields + length(valid_sub_fields) + safe_log(sprintf("Year %d: Processing %d fields in parallel", yr, length(valid_sub_fields))) - result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x, - harvesting_data = harvesting_data, - field_CI_data = ci_data, - season = yr)) %>% - purrr::list_rbind() + # Set up parallel future plan for field interpolation + # Allocate 1 core per ~100 fields (with minimum 2 cores) + n_cores <- max(2, min(parallel::detectCores() - 1, ceiling(length(valid_sub_fields) / 100))) + future::plan(strategy = future::multisession, workers = n_cores) - safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr)) - return(result) + # PARALLELIZE: Process all fields in parallel (each extracts & interpolates independently) + result_list <- furrr::future_map( + valid_sub_fields, + .progress = TRUE, + .options = furrr::furrr_options(seed = TRUE), + function(field) { + # Call with verbose=FALSE to suppress warnings during parallel iteration + extract_CI_data(field, + harvesting_data = harvesting_data, + field_CI_data = ci_data, + season = yr, + verbose = FALSE) + } + ) + + # Return to sequential processing + future::plan(future::sequential) + + # Process results and tracking + for (i in seq_along(result_list)) { + field_result <- result_list[[i]] + field_name <- valid_sub_fields[i] + + if (nrow(field_result) > 0) { + successful_fields <<- successful_fields + 1 + } else { + failed_fields[[length(failed_fields) + 1]] <<- list( + field = field_name, + season = yr, + reason = "Unable to generate interpolated data" + ) + } + } + + # Combine all results for this year + result_list <- result_list[sapply(result_list, nrow) > 0] # Keep only non-empty + if (length(result_list) > 0) { + purrr::list_rbind(result_list) + } else { + data.frame() + } }) - safe_log(paste("Total interpolated data points:", nrow(result))) + # Print summary + safe_log(sprintf("\n=== Interpolation Summary ===")) + safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields)) + + if (length(failed_fields) > 0) { + safe_log(sprintf("Failed to interpolate: %d fields", length(failed_fields))) + for (failure in failed_fields) { + safe_log(sprintf(" - Field %s (Season %d): %s", + failure$field, failure$season, failure$reason), "WARNING") + } + } + + safe_log(sprintf("Total interpolated data points: %d", nrow(result))) + return(result) } @@ -222,11 +361,19 @@ calculate_growth_metrics <- function(interpolated_data) { #' @return Path to the saved file #' save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") { + # Validate input + if (is.null(output_dir) || !is.character(output_dir) || length(output_dir) == 0) { + stop("output_dir must be a non-empty character string") + } + + # Normalize path separators for Windows compatibility + output_dir <- normalizePath(output_dir, winslash = "/", mustWork = FALSE) + # Create output directory if it doesn't exist dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - # Create full file path - file_path <- here::here(output_dir, file_name) + # Create full file path using file.path (more robust than here::here for absolute paths) + file_path <- file.path(output_dir, file_name) # Save the data saveRDS(data, file_path) diff --git a/r_app/30_interpolate_growth_model.R b/r_app/30_interpolate_growth_model.R index ed310e5..db42646 100644 --- a/r_app/30_interpolate_growth_model.R +++ b/r_app/30_interpolate_growth_model.R @@ -1,71 +1,142 @@ -# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\interpolate_growth_model.R +# ============================================================================ +# SCRIPT 30: Growth Model Interpolation (CI Time Series) +# ============================================================================ +# PURPOSE: +# Interpolate Canopy Index (CI) values across time to create continuous +# growth curves. Fills gaps in measurement dates, applies smoothing via +# LOESS, and generates daily CI estimates and cumulative statistics for +# each field. Enables downstream yield prediction and trend analysis. # -# INTERPOLATE_GROWTH_MODEL.R -# ========================= -# This script interpolates CI (Chlorophyll Index) values between measurement dates -# to create a continuous growth model. It generates daily values and cumulative -# CI statistics for each field. +# INPUT DATA: +# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds +# - Format: RDS (wide format: fields × dates with CI values) +# - Requirement: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx) # -# Usage: Rscript interpolate_growth_model.R [project_dir] -# - project_dir: Project directory name (e.g., "chemba") -# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata +# OUTPUT DATA: +# - Destination: laravel_app/storage/app/{project}/interpolated_ci/ +# - Format: RDS files per field (daily CI estimates) +# - Also exports: Growth model curves as RDS (cumulative CI, daily values) +# +# USAGE: +# Rscript 30_interpolate_growth_model.R [project] +# +# Example (Windows PowerShell): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata +# +# PARAMETERS: +# - project: Project name (character) - angata, chemba, xinavane, esa, simba +# +# CLIENT TYPES: +# - cane_supply (ANGATA): Yes - core growth monitoring +# - agronomic_support (AURA): Yes - field health trend analysis +# +# DEPENDENCIES: +# - Packages: tidyverse, lubridate +# - Utils files: parameters_project.R, 00_common_utils.R, 30_growth_model_utils.R +# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx) +# - Input data: combined_CI_data.rds from Script 20 +# - Data directories: interpolated_ci/ (created if missing) +# +# NOTES: +# - Interpolation method: LOESS smoothing with span = 0.3 (sensitive to local trends) +# - Gap-filling: Assumes continuous growth between measurements; skips clouds +# - Cumulative CI: Sum of daily interpolated values from planting to current date +# - Used by: Script 80 (KPI trends) and Script 12 (yield forecasting) +# - Critical for 8-week CV trend calculation and 4-week growth categorization +# +# RELATED ISSUES: +# SC-112: Utilities restructuring +# SC-108: Core pipeline improvements +# +# ============================================================================ # 1. Load required packages # ----------------------- suppressPackageStartupMessages({ - library(tidyverse) - library(lubridate) - library(here) + # File path handling + library(here) # For relative path resolution (platform-independent file paths) + + # Data manipulation + library(tidyverse) # For dplyr (data wrangling, grouping, mutating) + library(lubridate) # For date/time operations (date arithmetic, ISO week extraction) + library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases) + + # Parallel processing (Windows PSOCK + Mac/Linux fork-safe) + library(future) # For setting up parallel execution plans + library(furrr) # For future_map_dfr (parallel file I/O and field processing) + library(parallel) # For detectCores (automatic CPU detection) + library(data.table) # For fast filtering on large datasets }) -# 2. Main function to handle interpolation -# ------------------------------------- +# ============================================================================= +# MAIN PROCESSING FUNCTION +# ============================================================================= + main <- function() { - # Process command line arguments - args <- commandArgs(trailingOnly = TRUE) - - # Get project directory from arguments or use default - if (length(args) >= 1 && !is.na(args[1])) { - project_dir <- as.character(args[1]) - } else if (exists("project_dir", envir = .GlobalEnv)) { - project_dir <- get("project_dir", envir = .GlobalEnv) - } else { - project_dir <- "esa" - message("No project_dir provided. Using default:", project_dir) + # STEP 1: Set working directory to project root (smartcane/) + # This ensures all relative paths resolve correctly + if (basename(getwd()) == "r_app") { + setwd("..") } - # Make project_dir available globally so parameters_project.R can use it + # STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations) + # Parse command-line arguments FIRST + args <- commandArgs(trailingOnly = TRUE) + project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata" + + # Make project_dir available globally for parameters_project.R assign("project_dir", project_dir, envir = .GlobalEnv) - # Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction) - ci_extraction_script <- TRUE - assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv) - - # Initialize project configuration and load utility functions + # Load parameters_project.R (provides setup_project_directories, etc.) tryCatch({ - source("parameters_project.R") - source("30_growth_model_utils.R") + source("r_app/parameters_project.R") }, error = function(e) { - warning("Default source files not found. Attempting to source from 'r_app' directory.") - tryCatch({ - source(here::here("r_app", "parameters_project.R")) - source(here::here("r_app", "30_growth_model_utils.R")) - warning(paste("Successfully sourced files from 'r_app' directory.")) - - }, error = function(e) { - stop("Failed to source required files from both default and 'r_app' directories.") - }) + cat(sprintf("Error loading parameters_project.R: %s\n", e$message)) + stop(e) }) - log_message("Starting CI growth model interpolation") + # Load growth model utilities + tryCatch({ + source("r_app/30_growth_model_utils.R") + }, error = function(e) { + cat(sprintf("Error loading 30_growth_model_utils.R: %s\n", e$message)) + stop(e) + }) + + # STEP 3: Now all utilities are loaded, proceed with script logic + safe_log(sprintf("=== Script 30: Growth Model Interpolation ===")) + safe_log(sprintf("Project: %s", project_dir)) + + # Set up directory paths from parameters + setup <- setup_project_directories(project_dir) + + # For per-field architecture: read from daily_ci_vals_dir (Script 20 per-field output) + daily_vals_dir <- setup$daily_ci_vals_dir + safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir)) + + safe_log("Starting CI growth model interpolation") + + # Set up data directory paths + data_dir <- setup$data_dir # Load and process the data tryCatch({ - # Load the combined CI data - CI_data <- load_combined_ci_data(cumulative_CI_vals_dir) + # Load the combined CI data (created by Script 20 per-field) + # Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds + # OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files) + harvesting_data <- tryCatch({ + load_harvesting_data(data_dir) + }, error = function(e) { + safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING") + NULL + }) + + # Load CI data with date range pre-filtering + CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data) # Validate harvesting data if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { + safe_log("No harvesting data available", "ERROR") stop("No harvesting data available") } @@ -75,7 +146,7 @@ main <- function() { distinct(year) %>% pull(year) - log_message(paste("Processing data for years:", paste(years, collapse = ", "))) + safe_log(paste("Processing data for years:", paste(years, collapse = ", "))) # Generate interpolated CI data for each year and field CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data) @@ -89,20 +160,20 @@ main <- function() { # Add daily and cumulative metrics CI_all_with_metrics <- calculate_growth_metrics(CI_all) - # Save the processed data + # Save the processed data to cumulative_vals directory save_growth_model( CI_all_with_metrics, - cumulative_CI_vals_dir, + setup$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds" ) } else { - log_message("No CI data was generated after interpolation", level = "WARNING") + safe_log("No CI data was generated after interpolation", "WARNING") } - log_message("Growth model interpolation completed successfully") + safe_log("Growth model interpolation completed successfully") }, error = function(e) { - log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR") + safe_log(paste("Error in growth model interpolation:", e$message), "ERROR") stop(e$message) }) } diff --git a/r_app/40_mosaic_creation.R b/r_app/40_mosaic_creation.R deleted file mode 100644 index 7efb281..0000000 --- a/r_app/40_mosaic_creation.R +++ /dev/null @@ -1,215 +0,0 @@ -# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\mosaic_creation.R -# -# MOSAIC_CREATION.R -# =============== -# This script creates weekly mosaics from daily satellite imagery. -# It handles command-line arguments and initiates the mosaic creation process. -# -# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [use_tiles] [tile_size] -# - end_date: End date for processing (YYYY-MM-DD format) -# - offset: Number of days to look back from end_date -# - project_dir: Project directory name (e.g., "chemba") -# - file_name: Optional custom output file name -# - use_tiles: Use tile-based processing for memory efficiency (TRUE/FALSE, default: FALSE) -# - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE) -# -# Examples: - -# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 angata -# - -# 1. Load required packages -# ----------------------- -suppressPackageStartupMessages({ - library(sf) - library(terra) - library(tidyverse) - library(lubridate) - library(here) -}) - -# 2. Process command line arguments and run mosaic creation -# ------------------------------------------------------ -main <- function() { - # Capture command line arguments - args <- commandArgs(trailingOnly = TRUE) - - # Process project_dir argument with default - if (length(args) >= 3 && !is.na(args[3])) { - project_dir <- as.character(args[3]) - } else if (exists("project_dir", envir = .GlobalEnv)) { - project_dir <- get("project_dir", envir = .GlobalEnv) - } else { - # Default project directory - project_dir <- "angata" - message("No project_dir provided. Using default:", project_dir) - } - - # Make project_dir available globally so parameters_project.R can use it - assign("project_dir", project_dir, envir = .GlobalEnv) - - # Process end_date argument with default - if (length(args) >= 1 && !is.na(args[1])) { - end_date <- as.Date(args[1]) - if (is.na(end_date)) { - message("Invalid end_date provided. Using current date.") - end_date <- Sys.Date() - } - } else if (exists("end_date_str", envir = .GlobalEnv)) { - end_date <- as.Date(get("end_date_str", envir = .GlobalEnv)) - } else { - # Default to current date if no argument is provided - end_date <- Sys.Date() - message("No end_date provided. Using current date: ", format(end_date)) - } - - # Process offset argument with default - if (length(args) >= 2 && !is.na(args[2])) { - offset <- as.numeric(args[2]) - if (is.na(offset) || offset <= 0) { - message("Invalid offset provided. Using default (7 days).") - offset <- 7 - } - } else { - # Default to 7 days if no argument is provided - offset <- 7 - message("No offset provided. Using default:", offset, "days") - } - - # 3. Initialize project configuration - # -------------------------------- - - # Detect which data source directory exists (merged_tif or merged_tif_8b) - laravel_storage <- here::here("laravel_app/storage/app", project_dir) - data_source <- if (dir.exists(file.path(laravel_storage, "merged_tif_8b"))) { - message("Detected data source: merged_tif_8b (8-band optimized)") - "merged_tif_8b" - } else if (dir.exists(file.path(laravel_storage, "merged_tif"))) { - message("Detected data source: merged_tif (legacy 4-band)") - "merged_tif" - } else { - message("Warning: No data source found. Using default: merged_tif_8b") - "merged_tif_8b" - } - - # Set global data_source for parameters_project.R - assign("data_source", data_source, envir = .GlobalEnv) - - tryCatch({ - source("parameters_project.R") - source("40_mosaic_creation_utils.R") - safe_log(paste("Successfully sourced files from default directory.")) - }, error = function(e) { - message("Note: Could not open files from default directory (expected on some systems)") - message("Attempting to source from 'r_app' directory instead...") - tryCatch({ - source(here::here("r_app", "parameters_project.R")) - source(here::here("r_app", "40_mosaic_creation_utils.R")) - message("✓ Successfully sourced files from 'r_app' directory") - }, error = function(e) { - stop("Failed to source required files from both default and 'r_app' directories.") - }) - }) - - # 4. Generate date range for processing - # --------------------------------- - dates <- date_list(end_date, offset) - safe_log(paste("Processing data for week", dates$week, "of", dates$year)) - - # Create output filename - file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) { - as.character(args[4]) - } else { - paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif") - } - - safe_log(paste("Output will be saved as:", file_name_tif)) - - # 5. Create weekly mosaics - route based on project tile detection - # --------------------------------------------------------------- - # The use_tile_mosaic flag is auto-detected by parameters_project.R - # based on whether tiles exist in merged_final_tif/ - - if (!exists("use_tile_mosaic")) { - # Fallback detection if flag not set (shouldn't happen) - merged_final_dir <- file.path(laravel_storage, "merged_final_tif") - tile_detection <- detect_mosaic_mode(merged_final_dir) - use_tile_mosaic <- tile_detection$has_tiles - } - - if (use_tile_mosaic) { - # TILE-BASED APPROACH: Create per-tile weekly MAX mosaics - # This is used for projects like Angata with large ROIs requiring spatial partitioning - # Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20) - tryCatch({ - safe_log("Starting per-tile mosaic creation (tile-based approach)...") - - # Detect grid size from merged_final_tif folder structure - # Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc. - merged_final_base <- file.path(laravel_storage, "merged_final_tif") - grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE) - # Look for grid size patterns like "5x5", "10x10", "20x20" - grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) - - if (length(grid_patterns) == 0) { - stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)") - } - - grid_size <- grid_patterns[1] # Use first grid size found - safe_log(paste("Detected grid size:", grid_size)) - - # Point to the grid-specific merged_final_tif directory - merged_final_with_grid <- file.path(merged_final_base, grid_size) - - # Set output directory for per-tile mosaics, organized by grid size - # Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif - tile_output_base <- file.path(laravel_storage, "weekly_tile_max", grid_size) - dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE) - - created_tile_files <- create_weekly_mosaic_from_tiles( - dates = dates, - merged_final_dir = merged_final_with_grid, - tile_output_dir = tile_output_base, - field_boundaries = field_boundaries - ) - - safe_log(paste("✓ Per-tile mosaic creation completed - created", - length(created_tile_files), "tile files")) - }, error = function(e) { - safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR") - traceback() - stop("Mosaic creation failed") - }) - - } else { - # SINGLE-FILE APPROACH: Create single weekly mosaic file - # This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output - tryCatch({ - safe_log("Starting single-file mosaic creation (backward-compatible approach)...") - - # Set output directory for single-file mosaics - single_file_output_dir <- file.path(laravel_storage, "weekly_mosaic") - - created_file <- create_weekly_mosaic( - dates = dates, - field_boundaries = field_boundaries, - daily_vrt_dir = daily_vrt, - merged_final_dir = merged_final, - output_dir = single_file_output_dir, - file_name_tif = file_name_tif, - create_plots = FALSE - ) - - safe_log(paste("✓ Single-file mosaic creation completed:", created_file)) - }, error = function(e) { - safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR") - traceback() - stop("Mosaic creation failed") - }) - } -} - -if (sys.nframe() == 0) { - main() -} - \ No newline at end of file diff --git a/r_app/40_mosaic_creation_per_field.R b/r_app/40_mosaic_creation_per_field.R new file mode 100644 index 0000000..f7342d3 --- /dev/null +++ b/r_app/40_mosaic_creation_per_field.R @@ -0,0 +1,209 @@ +# ============================================================================ +# SCRIPT 40: Weekly Mosaic Creation (Per-Field CI Aggregation) +# ============================================================================ +# PURPOSE: +# Aggregate daily per-field CI TIFFs into weekly mosaics. Handles multi-date +# merging, cloud masking, and produces 5-band weekly output for reporting +# and KPI calculations. Supports both per-field and grid-based tile architecture. +# +# INPUT DATA: +# - Source: laravel_app/storage/app/{project}/field_tiles_CI/{FIELD}/{DATE}.tif +# - Format: GeoTIFF (5-band: R,G,B,NIR,CI as float32) +# - Dates: All available dates within week range +# +# OUTPUT DATA: +# - Destination: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/ +# - Format: GeoTIFF (5-band: R,G,B,NIR,CI) +# - Naming: week_WW_YYYY.tif (WW = ISO week, YYYY = ISO year) +# +# USAGE: +# Rscript 40_mosaic_creation_per_field.R [end_date] [offset] [project] +# +# Example (Windows PowerShell): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-01-12 7 angata +# +# PARAMETERS: +# - end_date: End date for processing (character, YYYY-MM-DD format, default today) +# - offset: Days to look back (numeric, default 7 for one week) +# - project: Project name (character) - angata, chemba, xinavane, esa, simba +# +# CLIENT TYPES: +# - cane_supply (ANGATA): Yes - weekly monitoring +# - agronomic_support (AURA): Yes - field health reporting +# +# DEPENDENCIES: +# - Packages: terra, sf, tidyverse, lubridate +# - Utils files: parameters_project.R, 40_mosaic_creation_per_field_utils.R +# - Input data: Daily per-field CI TIFFs from Script 20 +# - Data directories: field_tiles_CI/, weekly_mosaic/ +# +# NOTES: +# - Aggregation method: Maximum CI value per pixel across week (handles clouds) +# - ISO week-year used for consistent date handling across year boundaries +# - Supports both single-file and tiled mosaic architectures +# - Output feeds Scripts 80 (KPI calculations) and 90/91 (reporting) +# - Critical for trend analysis: week-over-week CI comparison +# +# RELATED ISSUES: +# SC-112: Script 40 cleanup (deleted legacy mosaic utils files) +# SC-108: Core pipeline improvements +# +# ============================================================================ + +# 1. Load required packages +# ----------------------- +suppressPackageStartupMessages({ + library(sf) + library(terra) + library(tidyverse) + library(lubridate) + library(here) +}) + +# 2. Main execution function +# ------------------------- +main <- function() { + + cat("\n") + cat("========================================================\n") + cat(" Script 40: Per-Field Weekly Mosaic Creation\n") + cat("========================================================\n\n") + + # Capture command line arguments + args <- commandArgs(trailingOnly = TRUE) + + # ==== Process Arguments ==== + + # Project directory + if (length(args) >= 3 && !is.na(args[3])) { + project_dir <- as.character(args[3]) + } else if (exists("project_dir", envir = .GlobalEnv)) { + project_dir <- get("project_dir", envir = .GlobalEnv) + } else { + project_dir <- "angata" + message("[INFO] No project_dir provided. Using default: angata") + } + + assign("project_dir", project_dir, envir = .GlobalEnv) + message(paste("[INFO] Project:", project_dir)) + + # End date + if (length(args) >= 1 && !is.na(args[1])) { + end_date <- as.Date(args[1], format = "%Y-%m-%d") + if (is.na(end_date)) { + message("[WARNING] Invalid end_date. Using current date.") + end_date <- Sys.Date() + } + } else { + end_date <- Sys.Date() + message(paste("[INFO] No end_date provided. Using current date:", format(end_date))) + } + + # Offset (days back) + if (length(args) >= 2 && !is.na(args[2])) { + offset <- as.numeric(args[2]) + if (is.na(offset) || offset <= 0) { + message("[WARNING] Invalid offset. Using default: 7 days") + offset <- 7 + } + } else { + offset <- 7 + message("[INFO] No offset provided. Using default: 7 days") + } + + # ==== Load Configuration ==== + + # Set working directory if needed + if (basename(getwd()) == "r_app") { + setwd("..") + } + + tryCatch({ + source("r_app/parameters_project.R") + message("[INFO] ✓ Loaded parameters_project.R") + }, error = function(e) { + stop("[ERROR] Failed to load parameters_project.R: ", e$message) + }) + + tryCatch({ + source("r_app/40_mosaic_creation_per_field_utils.R") + message("[INFO] ✓ Loaded 40_mosaic_creation_per_field_utils.R") + }, error = function(e) { + stop("[ERROR] Failed to load utilities: ", e$message) + }) + + # ==== Get Project Directories ==== + + setup <- setup_project_directories(project_dir) + + # Determine input/output directories + # Input: field_tiles_CI/ (from Script 20) + field_tiles_ci_dir <- setup$field_tiles_ci_dir + + # Output: weekly_mosaic/ (for Scripts 90/91) + weekly_mosaic_output_dir <- file.path(setup$laravel_storage_dir, "weekly_mosaic") + + message(paste("[INFO] Input directory:", field_tiles_ci_dir)) + message(paste("[INFO] Output directory:", weekly_mosaic_output_dir)) + + # ==== Validate Input Directory ==== + + if (!dir.exists(field_tiles_ci_dir)) { + stop(paste("[ERROR] Input directory not found:", field_tiles_ci_dir, + "\nScript 20 (CI extraction) must be run first to create per-field TIFFs.")) + } + + # Check if directory has any TIFFs + field_dirs <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE) + if (length(field_dirs) == 0) { + stop(paste("[ERROR] No field subdirectories found in:", field_tiles_ci_dir)) + } + + message(paste("[INFO] Found", length(field_dirs), "field directories")) + + # ==== Generate Date Range ==== + + dates <- date_list(end_date, offset) + + # Validate week calculation + message(sprintf("[INFO] Requested offset: %d days", offset)) + message(sprintf("[INFO] End date: %s", format(end_date, "%Y-%m-%d"))) + message(sprintf("[INFO] Start date: %s", format(dates$start_date, "%Y-%m-%d"))) + message(sprintf("[INFO] Calculating ISO week: %d", dates$week)) + message(sprintf("[INFO] Calculating ISO year: %d", dates$year)) + + # ==== Create Per-Field Weekly Mosaics ==== + + created_files <- create_all_field_weekly_mosaics( + dates = dates, + field_tiles_ci_dir = field_tiles_ci_dir, + output_dir = weekly_mosaic_output_dir + ) + + # ==== Summary ==== + + message("\n") + message("========================================================") + message(paste(" COMPLETED")) + message(paste(" Created:", length(created_files), "weekly field mosaics")) + message("========================================================\n") + + if (length(created_files) > 0) { + message("[SUCCESS] Weekly mosaics ready for reporting (Scripts 90/91)") + } else { + message("[WARNING] No mosaics created - check input data") + } + + return(invisible(created_files)) +} + +# Execute main if script is run directly +if (sys.nframe() == 0) { + tryCatch({ + created <- main() + quit(save = "no", status = 0) + }, error = function(e) { + message(paste("\n[FATAL ERROR]", e$message)) + quit(save = "no", status = 1) + }) +} diff --git a/r_app/40_mosaic_creation_per_field_utils.R b/r_app/40_mosaic_creation_per_field_utils.R new file mode 100644 index 0000000..c1e787e --- /dev/null +++ b/r_app/40_mosaic_creation_per_field_utils.R @@ -0,0 +1,268 @@ +# MOSAIC_CREATION_PER_FIELD_UTILS.R +# ================================== +# Utility functions for creating per-field weekly mosaics from per-field daily TIFFs. +# +# This module aggregates daily per-field 5-band TIFFs (R,G,B,NIR,CI from Script 20) +# into weekly per-field mosaics using MAX compositing. +# +# DATA FLOW: +# Script 20: field_tiles_CI/{FIELD}/{DATE}.tif (5-band daily, per-field) +# ↓ +# Script 40 NEW (this module): +# For each field: +# For each week: +# - Find all daily TIFFs for that week +# - Stack & create MAX composite +# - Save: weekly_mosaic/{FIELD}/week_WW_YYYY.tif +# ↓ +# Scripts 90/91: Read weekly_mosaic/{FIELD}/week_WW_YYYY.tif (unchanged interface) + +#' Generate date range for processing (ISO week-based) +#' +#' @param end_date The end date (Date object or YYYY-MM-DD string) +#' @param offset Number of days to look back from end_date (typically 7 for one week) +#' @return List with week, year, start_date, end_date, days_filter (vector of YYYY-MM-DD strings) +#' +date_list <- function(end_date, offset) { + if (!lubridate::is.Date(end_date)) { + end_date <- as.Date(end_date) + if (is.na(end_date)) { + stop("Invalid end_date. Expected Date object or YYYY-MM-DD string.") + } + } + + offset <- as.numeric(offset) + if (is.na(offset) || offset < 1) { + stop("Invalid offset. Expected positive number.") + } + + offset <- offset - 1 # Adjust to include end_date + start_date <- end_date - lubridate::days(offset) + + week <- lubridate::isoweek(end_date) + year <- lubridate::isoyear(end_date) + + # Validate: Check that all dates in range belong to same ISO week + start_week <- lubridate::isoweek(start_date) + start_year <- lubridate::isoyear(start_date) + + if (start_week != week || start_year != year) { + safe_log(sprintf("WARNING: Date range spans multiple ISO weeks! Start: week %d/%d, End: week %d/%d. Using END date week %d/%d.", + start_week, start_year, week, year, week, year), "WARNING") + } + + days_filter <- seq(from = start_date, to = end_date, by = "day") + days_filter <- format(days_filter, "%Y-%m-%d") + + safe_log(paste("Date range:", start_date, "to", end_date, + "(week", week, "of", year, ")")) + + return(list( + week = week, + year = year, + start_date = start_date, + end_date = end_date, + days_filter = days_filter + )) +} + +#' Find all per-field daily TIFFs for a specific week +#' +#' @param field_tiles_ci_dir Base directory containing per-field TIFFs +#' (e.g., field_tiles_CI/) +#' @param days_filter Vector of YYYY-MM-DD dates to match +#' @return List with field names and their matching TIFF files for the week +#' +find_per_field_tiffs_for_week <- function(field_tiles_ci_dir, days_filter) { + + if (!dir.exists(field_tiles_ci_dir)) { + safe_log(paste("Field TIFFs directory not found:", field_tiles_ci_dir), "WARNING") + return(list()) + } + + # List all field subdirectories + field_dirs <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE) + + if (length(field_dirs) == 0) { + safe_log("No field subdirectories found in field_tiles_CI/", "WARNING") + return(list()) + } + + # For each field, find TIFF files matching the week's dates + field_tiffs <- list() + + for (field in field_dirs) { + field_path <- file.path(field_tiles_ci_dir, field) + + # Find all TIFF files in this field directory + tiff_files <- list.files(field_path, pattern = "\\.tif$", full.names = TRUE) + + if (length(tiff_files) == 0) { + next # Skip fields with no TIFFs + } + + # Filter to only those matching week's dates + matching_files <- tiff_files[grepl(paste(days_filter, collapse = "|"), tiff_files)] + + if (length(matching_files) > 0) { + field_tiffs[[field]] <- sort(matching_files) + } + } + + safe_log(paste("Found TIFFs for", length(field_tiffs), "fields in week")) + + return(field_tiffs) +} + +#' Create weekly MAX composite for a single field +#' +#' Loads all daily TIFFs for a field+week combination and creates a MAX composite +#' (per-band maximum across all days). +#' +#' @param tiff_files Vector of TIFF file paths for this field+week +#' @param field_name Name of the field (for logging) +#' @return SpatRaster with 5 bands (R,G,B,NIR,CI), or NULL if fails +#' +create_field_weekly_composite <- function(tiff_files, field_name) { + + if (length(tiff_files) == 0) { + return(NULL) + } + + tryCatch({ + # Load all TIFFs + rasters <- list() + for (file in tiff_files) { + tryCatch({ + r <- terra::rast(file) + rasters[[length(rasters) + 1]] <- r + }, error = function(e) { + # Silently skip load errors (they're already counted) + }) + } + + if (length(rasters) == 0) { + return(NULL) + } + + # Create MAX composite + if (length(rasters) == 1) { + composite <- rasters[[1]] + } else { + # Stack all rasters and apply MAX per pixel per band + collection <- terra::sprc(rasters) + composite <- terra::mosaic(collection, fun = "max") + } + + # Ensure 5 bands with expected names + if (terra::nlyr(composite) >= 5) { + composite <- terra::subset(composite, 1:5) + names(composite) <- c("Red", "Green", "Blue", "NIR", "CI") + } + + + return(composite) + + }, error = function(e) { + safe_log(paste("Error creating composite for field", field_name, ":", e$message), "ERROR") + return(NULL) + }) +} + +#' Save per-field weekly mosaic +#' +#' @param raster SpatRaster to save +#' @param output_dir Base output directory (e.g., laravel_app/storage/app/{project}/weekly_mosaic/) +#' @param field_name Name of the field +#' @param week Week number (ISO week) +#' @param year Year (ISO year) +#' @return File path of saved TIFF, or NULL if fails +#' +save_field_weekly_mosaic <- function(raster, output_dir, field_name, week, year) { + + if (is.null(raster)) { + return(NULL) + } + + tryCatch({ + # Create field-specific output directory + field_output_dir <- file.path(output_dir, field_name) + dir.create(field_output_dir, recursive = TRUE, showWarnings = FALSE) + + # Generate filename: week_WW_YYYY.tif + filename <- sprintf("week_%02d_%04d.tif", week, year) + file_path <- file.path(field_output_dir, filename) + + # Save raster (silently) + terra::writeRaster(raster, file_path, overwrite = TRUE) + + return(file_path) + + }, error = function(e) { + safe_log(paste("Error saving mosaic for field", field_name, ":", e$message), "ERROR") + return(NULL) + }) +} + +#' Create all weekly mosaics for all fields in a week +#' +#' Main orchestration function. Loops over all fields and creates weekly mosaics. +#' +#' @param dates List from date_list() - contains week, year, days_filter +#' @param field_tiles_ci_dir Input: field_tiles_CI/ directory +#' @param output_dir Output: weekly_mosaic/ directory +#' @return Vector of successfully created file paths +#' +create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_dir) { + + safe_log(paste("=== Creating Per-Field Weekly Mosaics ===")) + safe_log(paste("Week:", dates$week, "Year:", dates$year)) + + # Find all per-field TIFFs for this week + field_tiffs <- find_per_field_tiffs_for_week(field_tiles_ci_dir, dates$days_filter) + + if (length(field_tiffs) == 0) { + safe_log("No per-field TIFFs found for this week - returning empty list", "WARNING") + return(character()) + } + + safe_log(paste("Processing", length(field_tiffs), "fields...")) + + created_files <- character() + + # Initialize progress bar + pb <- txtProgressBar(min = 0, max = length(field_tiffs), style = 3, width = 50) + counter <- 0 + + # Process each field + for (field_name in names(field_tiffs)) { + counter <- counter + 1 + tiff_files <- field_tiffs[[field_name]] + + # Create composite + composite <- create_field_weekly_composite(tiff_files, field_name) + + if (!is.null(composite)) { + # Save + saved_path <- save_field_weekly_mosaic( + composite, + output_dir, + field_name, + dates$week, + dates$year + ) + + if (!is.null(saved_path)) { + created_files <- c(created_files, saved_path) + } + } + + setTxtProgressBar(pb, counter) + } + + close(pb) + cat("\n") # New line after progress bar + safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics")) + + return(created_files) +} diff --git a/r_app/40_mosaic_creation_tile_utils.R b/r_app/40_mosaic_creation_tile_utils.R deleted file mode 100644 index 3a3af35..0000000 --- a/r_app/40_mosaic_creation_tile_utils.R +++ /dev/null @@ -1,286 +0,0 @@ -# MOSAIC_CREATION_TILE_UTILS.R -# ============================ -# Tile-based processing utilities for scalable weekly mosaic creation -# -# STRATEGY: Memory-efficient, scalable approach for large study areas -# Split daily full-extent mosaics into fixed-size tiles (e.g., 5×5 km), -# then process each tile position across all days, and reassemble. -# -# WORKFLOW: -# Daily full-extent TIFF -# ↓ -# Split into N×M tiles (based on area size / tile_size) -# E.g., 50×80 km area with 5 km tiles = 10×16 = 160 tiles -# ↓ -# For EACH TILE POSITION (1 to 160): -# - Load that tile from all 7 days -# - Create MAX composite for that tile -# - Save tile_NNN_MAX.tif -# ↓ -# Reassemble all 160 MAX tiles back into full-extent -# ↓ -# Save weekly full-extent mosaic -# -# KEY BENEFIT: Memory usage ~50 MB per tile (5×5 km), not 1.3 GB (full 50×80 km) -# Scales automatically: bigger area = more tiles, same memory per tile -# -# TILE_SIZE: Configurable (default 5000 m = 5 km, adjustable via parameter) - -#' Simple tile-based processing using terra::makeTiles() -#' -#' Calculates tile grid based on desired tile SIZE (e.g., 5 km), not grid count. -#' This makes it SCALABLE: bigger area = more tiles automatically. -#' -#' @param raster_path Path to raster to split -#' @param output_dir Directory to save tiles -#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km) -#' @return List with tiles (file paths) and metadata -#' -split_raster_into_tiles <- function(raster_path, output_dir, tile_size_m = 5000) { - # Load raster - r <- terra::rast(raster_path) - - # Calculate how many tiles we need based on raster extent and tile size - x_range <- terra::ext(r)$xmax - terra::ext(r)$xmin - y_range <- terra::ext(r)$ymax - terra::ext(r)$ymin - - n_tiles_x <- ceiling(x_range / tile_size_m) - n_tiles_y <- ceiling(y_range / tile_size_m) - - safe_log(paste("Splitting into", n_tiles_x, "×", n_tiles_y, "tiles of", - tile_size_m / 1000, "km")) - - # Create output directory - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - - # Use terra::makeTiles() - it splits based on n = c(rows, cols) - tiles <- terra::makeTiles( - r, - n = c(n_tiles_y, n_tiles_x), # rows, cols - filename = file.path(output_dir, "tile_.tif"), - overwrite = TRUE - ) - - safe_log(paste("Created", length(tiles), "tile files")) - - return(list( - tiles = tiles, - n_tiles = length(tiles), - n_tiles_x = n_tiles_x, - n_tiles_y = n_tiles_y, - extent = terra::ext(r), - raster = raster_path - )) -} - - -#' Create weekly MAX mosaic using TRUE tile-based processing -#' -#' TILE-BASED WORKFLOW (Memory efficient): -#' 1. Calculate tile grid dynamically from extent and FIXED tile_size (e.g., 5 km) -#' 2. For EACH TILE across all 7 days: -#' - Load that tile from each daily file (small ~50 MB, not 1.3 GB) -#' - Create MAX composite for just that tile -#' 3. Reassemble all tiles into final full-extent mosaic -#' -#' SCALABILITY: Fixed 5 km tile size means bigger area = more tiles (automatic scaling) -#' -#' @param dates List from date_list() with days_filter -#' @param merged_final_dir Directory with daily merged full-extent TIFFs -#' @param final_output_dir Directory to store final reassembled mosaic -#' @param file_name_tif Output filename for final mosaic -#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km). Bigger area automatically gets more tiles. -#' @return Path to final reassembled weekly mosaic -#' -create_weekly_mosaic_tiled <- function(dates, merged_final_dir, - final_output_dir, file_name_tif, - tile_size_m = 5000) { - - # Get daily files for this week - daily_files <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$") - daily_files <- daily_files[grepl(paste(dates$days_filter, collapse = "|"), daily_files)] - - if (length(daily_files) == 0) { - safe_log("No daily files found for this week", "ERROR") - return(NULL) - } - - safe_log(paste("Found", length(daily_files), "daily files for week", dates$week)) - - # Load first daily file to get extent and calculate tile grid - safe_log("Step 1: Loading first daily file to establish tile structure") - first_raster <- terra::rast(daily_files[1]) - - # Get CRS and convert tile_size_m to appropriate units - raster_crs <- terra::crs(first_raster) - - # If raster is in lat/lon (geographic), convert tile_size_m to degrees - # 1 degree latitude ≈ 111 km, so 5000 m ≈ 0.045 degrees - # Check for GEOGCRS (geographic coordinate system) in WKT string - is_geographic <- grepl("GEOGCRS|longlat|geographic|ANGLEUNIT.*degree", - as.character(raster_crs), ignore.case = TRUE) - - if (is_geographic) { - # Geographic CRS - convert meters to degrees - # At equator: 1 degree ≈ 111 km - tile_size_degrees <- tile_size_m / 111000 # 111 km per degree - safe_log(paste("Raster is in geographic CRS (lat/lon). Converting", tile_size_m / 1000, - "km to", round(tile_size_degrees, 4), "degrees")) - } else { - # Projected CRS - use meters directly - tile_size_degrees <- tile_size_m - safe_log(paste("Raster is in projected CRS. Using", tile_size_m / 1000, "km directly")) - } - - # Calculate n_tiles dynamically from extent and tile_size - x_range <- terra::ext(first_raster)$xmax - terra::ext(first_raster)$xmin - y_range <- terra::ext(first_raster)$ymax - terra::ext(first_raster)$ymin - - n_tiles_x <- ceiling(x_range / tile_size_degrees) - n_tiles_y <- ceiling(y_range / tile_size_degrees) - n_tiles_total <- n_tiles_x * n_tiles_y - - safe_log(paste("Step 2: Creating tile grid:", tile_size_m / 1000, "km tiles")) - safe_log(paste(" Extent:", round(x_range, 4), "° ×", round(y_range, 4), "°")) - safe_log(paste(" Grid:", n_tiles_y, "rows ×", n_tiles_x, "columns =", n_tiles_total, "tiles")) - - # Calculate tile extents mathematically (no need to save temp files) - extent <- terra::ext(first_raster) - x_min <- extent$xmin - y_min <- extent$ymin - - # Create list of tile extents - tile_extents <- list() - tile_idx <- 0 - - for (row in 1:n_tiles_y) { - for (col in 1:n_tiles_x) { - tile_idx <- tile_idx + 1 - - # Calculate this tile's bounds - tile_xmin <- x_min + (col - 1) * tile_size_degrees - tile_xmax <- min(tile_xmin + tile_size_degrees, extent$xmax) - tile_ymin <- y_min + (row - 1) * tile_size_degrees - tile_ymax <- min(tile_ymin + tile_size_degrees, extent$ymax) - - tile_extents[[tile_idx]] <- terra::ext(tile_xmin, tile_xmax, tile_ymin, tile_ymax) - } - } - - safe_log(paste("Calculated", length(tile_extents), "tile extents")) - - # Save tiles to Laravel storage directory - tile_storage_dir <- file.path("laravel_app", "storage", "app", "angata", "daily_tiles") - dir.create(tile_storage_dir, recursive = TRUE, showWarnings = FALSE) - - # For each tile, load from all daily files and create MAX - safe_log("Step 3: Processing tiles (load per-tile across all days, create MAX for each)") - tile_files_list <- list() - - for (tile_idx in seq_along(tile_extents)) { - if (tile_idx %% max(1, ceiling(n_tiles_total / 10)) == 0 || tile_idx == 1) { - safe_log(paste(" Processing tile", tile_idx, "of", n_tiles_total)) - } - - # Get this tile's extent - tile_extent <- tile_extents[[tile_idx]] - - # Load and crop all daily files to this tile extent - daily_tile_data <- list() - for (file_idx in seq_along(daily_files)) { - tryCatch({ - r <- terra::rast(daily_files[file_idx]) - cropped <- terra::crop(r, tile_extent, snap = "in") - daily_tile_data[[file_idx]] <- cropped - }, error = function(e) { - safe_log(paste("Warning: Could not crop tile", tile_idx, "from", - basename(daily_files[file_idx])), "WARNING") - }) - } - - if (length(daily_tile_data) == 0) { - safe_log(paste("No valid data for tile", tile_idx), "WARNING") - next - } - - # Create MAX composite for this tile - if (length(daily_tile_data) == 1) { - tile_max <- daily_tile_data[[1]] - } else { - collection <- terra::sprc(daily_tile_data) - tile_max <- terra::mosaic(collection, fun = "max") - } - - # Save tile to disk (keeps memory low) - tile_file <- file.path(tile_storage_dir, sprintf("tile_%04d.tif", tile_idx)) - terra::writeRaster(tile_max, tile_file, overwrite = TRUE) - tile_files_list[[tile_idx]] <- tile_file - } - - if (length(tile_files_list) == 0) { - safe_log("No tiles processed successfully", "ERROR") - return(NULL) - } - - safe_log(paste("Step 4: Reassembling", length(tile_files_list), "tiles from disk into full-extent mosaic")) - - # Load all tile files and reassemble - tile_rasters <- lapply(tile_files_list, terra::rast) - collection <- terra::sprc(tile_rasters) - final_mosaic <- terra::mosaic(collection, fun = "first") - - safe_log("Step 5: Saving final reassembled mosaic") - - # Save - dir.create(final_output_dir, recursive = TRUE, showWarnings = FALSE) - final_file_path <- file.path(final_output_dir, file_name_tif) - - tryCatch({ - terra::writeRaster(final_mosaic, final_file_path, overwrite = TRUE) - safe_log(paste("✓ Weekly mosaic saved to:", final_file_path)) - }, error = function(e) { - safe_log(paste("Error saving mosaic:", e$message), "ERROR") - return(NULL) - }) - - # Cleanup temporary tile files - unlink(tile_storage_dir, recursive = TRUE) - safe_log("Cleaned up temporary tile files") - - return(final_file_path) -} - -#' Load tile MAX rasters for a specific week (for per-tile analysis) -#' -#' @param week Week number -#' @param tile_dir Directory containing tile mosaics (week subdirectories) -#' @return List of tile rasters with tile_id metadata -#' -load_tile_mosaics <- function(week, tile_dir) { - week_dir <- file.path(tile_dir, paste0("week_", week)) - - if (!dir.exists(week_dir)) { - warning(paste("Tile directory not found:", week_dir)) - return(NULL) - } - - # Load all tile files - tile_files <- list.files(week_dir, pattern = "^tile_.*\\.tif$", full.names = TRUE) - - if (length(tile_files) == 0) { - warning("No tile files found in:", week_dir) - return(NULL) - } - - # Sort by tile ID - tile_numbers <- as.numeric(gsub(".*tile_(\\d+).*", "\\1", tile_files)) - tile_files <- tile_files[order(tile_numbers)] - - # Load rasters - tiles_list <- lapply(tile_files, terra::rast) - names(tiles_list) <- sprintf("tile_%03d", sort(tile_numbers)) - - safe_log(paste("Loaded", length(tiles_list), "tile mosaics for week", week)) - - return(tiles_list) -} diff --git a/r_app/40_mosaic_creation_utils.R b/r_app/40_mosaic_creation_utils.R deleted file mode 100644 index dc7b778..0000000 --- a/r_app/40_mosaic_creation_utils.R +++ /dev/null @@ -1,827 +0,0 @@ -# MOSAIC_CREATION_UTILS.R -# ====================== -# Utility functions for creating weekly mosaics from daily satellite imagery. -# These functions support cloud cover assessment, date handling, and mosaic creation. - -#' Detect whether a project uses tile-based or single-file mosaic approach -#' -#' @param merged_final_tif_dir Directory containing merged_final_tif files -#' @return List with has_tiles (logical), detected_tiles (vector), total_files (count) -#' -detect_mosaic_mode <- function(merged_final_tif_dir) { - # Check if directory exists - if (!dir.exists(merged_final_tif_dir)) { - return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0)) - } - - # List all .tif files in merged_final_tif - tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE) - - if (length(tif_files) == 0) { - return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0)) - } - - # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits) - # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif - tile_pattern <- "_(\\d{2})\\.tif$" - tile_files <- tif_files[grepl(tile_pattern, tif_files)] - - has_tiles <- length(tile_files) > 0 - - return(list( - has_tiles = has_tiles, - detected_tiles = tile_files, - total_files = length(tif_files) - )) -} - -#' Safe logging function -#' @param message The message to log -#' @param level The log level (default: "INFO") -#' @return NULL (used for side effects) -#' -safe_log <- function(message, level = "INFO") { - if (exists("log_message")) { - log_message(message, level) - } else { - if (level %in% c("ERROR", "WARNING")) { - warning(message) - } else { - message(message) - } - } -} - -#' Generate a sequence of dates for processing -#' -#' @param end_date The end date for the sequence (Date object) -#' @param offset Number of days to look back from end_date -#' @return A list containing week number, year, and a sequence of dates for filtering -#' -date_list <- function(end_date, offset) { - # Input validation - if (!lubridate::is.Date(end_date)) { - end_date <- as.Date(end_date) - if (is.na(end_date)) { - stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.") - } - } - - offset <- as.numeric(offset) - if (is.na(offset) || offset < 1) { - stop("Invalid offset provided. Expected a positive number.") - } - - # Calculate date range - offset <- offset - 1 # Adjust offset to include end_date - start_date <- end_date - lubridate::days(offset) - - # Extract week and year information - week <- lubridate::isoweek(end_date) - year <- lubridate::isoyear(end_date) - - # Generate sequence of dates - days_filter <- seq(from = start_date, to = end_date, by = "day") - days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering - - # Log the date range - safe_log(paste("Date range generated from", start_date, "to", end_date)) - - return(list( - "week" = week, - "year" = year, - "days_filter" = days_filter, - "start_date" = start_date, - "end_date" = end_date - )) -} - -#' Create a weekly mosaic from available VRT files -#' -#' @param dates List from date_list() with date range info -#' @param field_boundaries Field boundaries for image cropping -#' @param daily_vrt_dir Directory containing VRT files -#' @param merged_final_dir Directory with merged final rasters -#' @param output_dir Output directory for weekly mosaics -#' @param file_name_tif Output filename for the mosaic -#' @param create_plots Whether to create visualization plots (default: TRUE) -#' @return The file path of the saved mosaic -#' -create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir, - merged_final_dir, output_dir, file_name_tif, - create_plots = FALSE) { - # Find VRT files for the specified date range - vrt_list <- find_vrt_files(daily_vrt_dir, dates) - - # Find final raster files for fallback - raster_files_final <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$") - - # Process the mosaic if VRT files are available - if (length(vrt_list) > 0) { - safe_log("VRT list created, assessing cloud cover for mosaic creation") - - # Calculate aggregated cloud cover statistics (returns data frame for image selection) - cloud_coverage_stats <- count_cloud_coverage(vrt_list, merged_final_dir, field_boundaries) - - # Create mosaic based on cloud cover assessment - mosaic <- create_mosaic(raster_files_final, cloud_coverage_stats, field_boundaries) - - } else { - safe_log("No VRT files available for the date range, creating empty mosaic with NA values", "WARNING") - - # Create empty mosaic if no files are available - if (length(raster_files_final) == 0) { - stop("No VRT files or final raster files available to create mosaic") - } - - mosaic <- terra::rast(raster_files_final[1]) - mosaic <- terra::setValues(mosaic, NA) - mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE) - - names(mosaic) <- c("Red", "Green", "Blue", "NIR", "CI") - } - - # Save the mosaic (without mask files to avoid breaking other scripts) - file_path <- save_mosaic(mosaic, output_dir, file_name_tif, create_plots, save_mask = FALSE) - - safe_log(paste("Weekly mosaic processing completed for week", dates$week)) - - return(file_path) -} - -#' Find VRT files within a date range -#' -#' @param vrt_directory Directory containing VRT files -#' @param dates List from date_list() function containing days_filter -#' @return Character vector of VRT file paths -#' -find_vrt_files <- function(vrt_directory, dates) { - # Get all VRT files in directory - vrt_files <- list.files(here::here(vrt_directory), full.names = TRUE) - - if (length(vrt_files) == 0) { - warning("No VRT files found in directory: ", vrt_directory) - return(character(0)) - } - - # Filter files by dates - vrt_list <- purrr::map(dates$days_filter, ~ vrt_files[grepl(pattern = .x, x = vrt_files)]) %>% - purrr::compact() %>% - purrr::flatten_chr() - - # Log results - safe_log(paste("Found", length(vrt_list), "VRT files for the date range")) - - return(vrt_list) -} - -#' Count missing pixels (clouds) in rasters - per field analysis using actual TIF files -#' -#' @param vrt_list List of VRT file paths (used to extract dates for TIF file lookup) -#' @param merged_final_dir Directory containing the actual TIF files (e.g., merged_final_tif) -#' @param field_boundaries Field boundaries (sf object) for calculating cloud cover over field areas only -#' @return Data frame with aggregated cloud statistics for each TIF file (used for mosaic selection) -#' -count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_boundaries = NULL) { - if (length(vrt_list) == 0) { - warning("No VRT files provided for cloud coverage calculation") - return(NULL) - } - - tryCatch({ - # Extract dates from VRT filenames to find corresponding TIF files - # VRT filenames are like "merged2025-12-18.vrt", TIF filenames are like "2025-12-18.tif" - tif_dates <- gsub(".*([0-9]{4}-[0-9]{2}-[0-9]{2}).*", "\\1", basename(vrt_list)) - - # Build list of actual TIF files to use - tif_files <- paste0(here::here(merged_final_dir), "/", tif_dates, ".tif") - - # Check which TIF files exist - tif_exist <- file.exists(tif_files) - if (!any(tif_exist)) { - warning("No TIF files found in directory: ", merged_final_dir) - return(NULL) - } - - tif_files <- tif_files[tif_exist] - safe_log(paste("Found", length(tif_files), "TIF files for cloud coverage assessment")) - - # Initialize list to store aggregated results - aggregated_results <- list() - - # Process each TIF file - for (tif_idx in seq_along(tif_files)) { - tif_file <- tif_files[tif_idx] - - tryCatch({ - # Load the TIF file (typically has 5 bands: R, G, B, NIR, CI) - current_raster <- terra::rast(tif_file) - - # Extract the CI band (last band) - ci_band <- current_raster[[terra::nlyr(current_raster)]] - - # Create a unique field mask for THIS raster's extent - # This handles cases where rasters have different extents due to missing data - total_notna <- NA_real_ - total_pixels <- NA_real_ - - if (!is.null(field_boundaries)) { - tryCatch({ - # Create mask specific to this raster's grid - field_mask <- terra::rasterize(field_boundaries, ci_band, field = 1) - - # Count pixels within field boundaries (for this specific raster) - total_pixels <- terra::global(field_mask, fun = "notNA")$notNA - - # Cloud coverage calculated only over field areas - ci_field_masked <- terra::mask(ci_band, field_mask, maskvalue = NA) - total_notna <- terra::global(ci_field_masked, fun = "notNA")$notNA - - }, error = function(e) { - # If field mask creation fails, fall back to entire raster - safe_log(paste("Could not create field mask for", basename(tif_file), ":", e$message), "WARNING") - }) - } - - # If field mask failed, use entire raster - if (is.na(total_notna)) { - total_notna <- terra::global(ci_band, fun = "notNA")$notNA - total_pixels <- terra::ncell(ci_band) - } - - # Calculate cloud coverage percentage (missing = clouds) - missing_pct <- round(100 - ((total_notna / total_pixels) * 100)) - - aggregated_results[[tif_idx]] <- data.frame( - filename = tif_file, - notNA = total_notna, - total_pixels = total_pixels, - missing_pixels_percentage = missing_pct, - thres_5perc = as.integer(missing_pct < 5), - thres_40perc = as.integer(missing_pct < 45), - stringsAsFactors = FALSE - ) - - }, error = function(e) { - safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING") - aggregated_results[[tif_idx]] <<- data.frame( - filename = tif_file, - notNA = NA_real_, - total_pixels = NA_real_, - missing_pixels_percentage = 100, - thres_5perc = 0, - thres_40perc = 0, - stringsAsFactors = FALSE - ) - }) - } - - # Combine all aggregated results - aggregated_df <- if (length(aggregated_results) > 0) { - do.call(rbind, aggregated_results) - } else { - data.frame() - } - - # Log results - safe_log(paste("Cloud coverage assessment completed for", length(vrt_list), "images")) - - # Return aggregated data only - return(aggregated_df) - - }, error = function(e) { - warning("Error in cloud coverage calculation: ", e$message) - return(NULL) - }) -} - -#' Create a mosaic from merged_final_tif files based on cloud coverage -#' -#' @param tif_files List of processed TIF files (5 bands: R, G, B, NIR, CI) -#' @param cloud_coverage_stats Cloud coverage statistics from count_cloud_coverage() -#' @param field_boundaries Field boundaries for masking (optional) -#' @return A SpatRaster object with 5 bands (Red, Green, Blue, NIR, CI) -#' -create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NULL) { - # If no TIF files, return NULL - if (length(tif_files) == 0) { - safe_log("No TIF files available for mosaic creation", "ERROR") - return(NULL) - } - - # Validate cloud coverage stats - mosaic_type <- "Unknown" # Track what type of mosaic is being created - - if (is.null(cloud_coverage_stats) || nrow(cloud_coverage_stats) == 0) { - safe_log("No cloud coverage statistics available, using all files", "WARNING") - rasters_to_use <- tif_files - mosaic_type <- paste("all", length(tif_files), "available images") - } else { - # Determine best rasters to use based on cloud coverage thresholds - # Count how many images meet each threshold - num_5perc <- sum(cloud_coverage_stats$thres_5perc, na.rm = TRUE) - num_40perc <- sum(cloud_coverage_stats$thres_40perc, na.rm = TRUE) - - if (num_5perc > 1) { - # Multiple images with <5% cloud coverage - safe_log(paste("Creating max composite from", num_5perc, "cloud-free images (<5% clouds)")) - mosaic_type <- paste(num_5perc, "cloud-free images (<5% clouds)") - best_coverage <- which(cloud_coverage_stats$thres_5perc > 0) - - } else if (num_5perc == 1) { - # Single image with <5% cloud coverage - safe_log("Using single cloud-free image (<5% clouds)") - mosaic_type <- "single cloud-free image (<5% clouds)" - best_coverage <- which(cloud_coverage_stats$thres_5perc > 0) - - } else if (num_40perc > 1) { - # Multiple images with <40% cloud coverage - safe_log(paste("Creating max composite from", num_40perc, "partially cloudy images (<40% clouds)"), "WARNING") - mosaic_type <- paste(num_40perc, "partially cloudy images (<40% clouds)") - best_coverage <- which(cloud_coverage_stats$thres_40perc > 0) - - } else if (num_40perc == 1) { - # Single image with <40% cloud coverage - safe_log("Using single partially cloudy image (<40% clouds)", "WARNING") - mosaic_type <- "single partially cloudy image (<40% clouds)" - best_coverage <- which(cloud_coverage_stats$thres_40perc > 0) - - } else { - # No cloud-free images available - safe_log("No cloud-free images available, using all images", "WARNING") - mosaic_type <- paste("all", nrow(cloud_coverage_stats), "available images") - best_coverage <- seq_len(nrow(cloud_coverage_stats)) - } - - # Get filenames of best-coverage images - # Match by full filename from cloud stats to TIF files - rasters_to_use <- character() - for (idx in best_coverage) { - # Get the full filename from cloud coverage stats - cc_filename <- cloud_coverage_stats$filename[idx] - - # Find matching TIF file by full filename - matching_tif <- NULL - for (tif_file in tif_files) { - tif_basename <- basename(tif_file) - if (tif_basename == cc_filename) { - matching_tif <- tif_file - break - } - } - - if (!is.null(matching_tif)) { - rasters_to_use <- c(rasters_to_use, matching_tif) - } else { - safe_log(paste("Warning: Could not find TIF file matching cloud stats entry:", cc_filename), "WARNING") - } - } - - if (length(rasters_to_use) == 0) { - safe_log("Could not match cloud coverage stats to TIF files, using all files", "WARNING") - rasters_to_use <- tif_files - mosaic_type <- paste("all", length(tif_files), "available images") - } - } - - # Load and mosaic the selected rasters - if (length(rasters_to_use) == 1) { - # Single file - just load it - safe_log(paste("Using single image for mosaic:", basename(rasters_to_use))) - mosaic <- terra::rast(rasters_to_use[1]) - } else { - # Multiple files - merge handles different extents/grids automatically - safe_log(paste("Creating mosaic from", length(rasters_to_use), "images")) - - # Load all rasters with error handling - only keep successful loads - all_rasters <- Filter(Negate(is.null), lapply(rasters_to_use, function(f) { - tryCatch({ - terra::rast(f) - }, error = function(e) { - safe_log(paste("Warning: Could not load", basename(f), ":", e$message), "WARNING") - NULL # Return NULL on error, will be filtered out - }) - })) - - # Check what we loaded - safe_log(paste("Loaded", length(all_rasters), "valid rasters from", length(rasters_to_use), "files")) - - if (length(all_rasters) == 0) { - safe_log("No valid rasters to merge", "WARNING") - return(NULL) - } - - # Merge all rasters (handles different extents and grids automatically) - if (length(all_rasters) == 1) { - mosaic <- all_rasters[[1]] - safe_log("Using single raster after filtering") - } else { - # Create max composite: take maximum value at each pixel across all dates - # This skips clouds (low/zero CI values) in favor of clear pixels from other dates - mosaic <- tryCatch({ - safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds")) - - # Check if all rasters have identical grids (extent and resolution) - # This is likely for per-tile mosaics from the same tiling scheme - reference_raster <- all_rasters[[1]] - ref_ext <- terra::ext(reference_raster) - ref_res <- terra::res(reference_raster) - - grids_match <- all(sapply(all_rasters[-1], function(r) { - isTRUE(all.equal(terra::ext(r), ref_ext, tolerance = 1e-6)) && - isTRUE(all.equal(terra::res(r), ref_res, tolerance = 1e-6)) - })) - - if (grids_match) { - # All rasters have matching grids - no cropping/resampling needed! - safe_log("All rasters have identical grids - stacking directly for max composite") - raster_collection <- terra::sprc(all_rasters) - max_mosaic <- terra::mosaic(raster_collection, fun = "max") - } else { - # Grids don't match - need to crop and resample - safe_log("Rasters have different grids - cropping and resampling to common extent") - - # Get extent from field boundaries if available, otherwise use raster union - if (!is.null(field_boundaries)) { - crop_extent <- terra::ext(field_boundaries) - safe_log("Using field boundaries extent for consistent area across all dates") - } else { - # Use union of all extents (covers all data) - crop_extent <- terra::ext(all_rasters[[1]]) - for (i in 2:length(all_rasters)) { - crop_extent <- terra::union(crop_extent, terra::ext(all_rasters[[i]])) - } - safe_log("Using raster union extent") - } - - # Crop all rasters to common extent - cropped_rasters <- lapply(all_rasters, function(r) { - terra::crop(r, crop_extent) - }) - - # Resample all cropped rasters to match the first one's grid - reference_grid <- cropped_rasters[[1]] - - aligned_rasters <- lapply(cropped_rasters, function(r) { - if (isTRUE(all.equal(terra::ext(r), terra::ext(reference_grid), tolerance = 1e-6)) && - isTRUE(all.equal(terra::res(r), terra::res(reference_grid), tolerance = 1e-6))) { - return(r) # Already aligned - } - terra::resample(r, reference_grid, method = "near") - }) - - # Create max composite using mosaic on aligned rasters - raster_collection <- terra::sprc(aligned_rasters) - max_mosaic <- terra::mosaic(raster_collection, fun = "max") - } - - max_mosaic - }, error = function(e) { - safe_log(paste("Max composite creation failed:", e$message), "WARNING") - safe_log("Using first raster only as fallback") - all_rasters[[1]] - }) - safe_log(paste("Max composite created - taking clearest pixel at each location")) - } - - # Ensure we have exactly the required bands: Red, Green, Blue, NIR, CI - required_bands <- c("Red", "Green", "Blue", "NIR", "CI") - available_bands <- names(mosaic) - - # Check if all required bands are present - if (!all(required_bands %in% available_bands)) { - safe_log(paste("Warning: Not all required bands found. Available:", paste(available_bands, collapse = ", ")), "WARNING") - } - - # Select only the required bands in the correct order - if (all(required_bands %in% available_bands)) { - mosaic <- mosaic[[required_bands]] - safe_log("Selected Red, Green, Blue, NIR, CI bands") - } else { - safe_log(paste("Warning: mosaic has", terra::nlyr(mosaic), "bands, expected 5 (R, G, B, NIR, CI)"), "WARNING") - if (terra::nlyr(mosaic) > 5) { - # Keep only first 5 bands as fallback - mosaic <- terra::subset(mosaic, 1:5) - safe_log("Keeping only first 5 bands as fallback") - } - } - } - - # Crop/mask to field boundaries if provided - if (!is.null(field_boundaries)) { - tryCatch({ - mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE) - safe_log("Mosaic cropped to field boundaries") - }, error = function(e) { - safe_log(paste("Could not crop to field boundaries:", e$message), "WARNING") - # Return uncropped mosaic - }) - } - - # Log final mosaic summary - safe_log(paste("✓ Mosaic created from", mosaic_type, "-", terra::nlyr(mosaic), - "bands,", nrow(mosaic), "x", ncol(mosaic), "pixels")) - - return(mosaic) -} - -#' Save a mosaic raster to disk -#' -#' @param mosaic_raster A SpatRaster object to save -#' @param output_dir Directory to save the output -#' @param file_name Filename for the output raster -#' @param plot_result Whether to create visualizations (default: FALSE) -#' @param save_mask Whether to save cloud masks separately (default: FALSE) -#' @return The file path of the saved raster -#' -save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALSE, save_mask = FALSE) { - # Validate input - if (is.null(mosaic_raster)) { - stop("No mosaic raster provided to save") - } - - # Create output directory if it doesn't exist - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - - # Create full file path - file_path <- here::here(output_dir, file_name) - - # Get cloud mask if it exists - cloud_mask <- attr(mosaic_raster, "cloud_mask") - - # Save raster - terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE) - - # Save cloud mask if available and requested - if (!is.null(cloud_mask) && save_mask) { - # Create mask filename by adding _mask before extension - mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name) - mask_file_path <- here::here(output_dir, mask_file_name) - - # Save the mask - terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE) - safe_log(paste("Cloud/shadow mask saved to:", mask_file_path)) - } else if (!is.null(cloud_mask)) { - safe_log("Cloud mask available but not saved (save_mask = FALSE)") - } - - # Create plots if requested - if (plot_result) { - # Plot the CI band - if ("CI" %in% names(mosaic_raster)) { - terra::plot(mosaic_raster$CI, main = paste("CI map", file_name)) - } - - # Plot RGB image - if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) { - terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name)) - } - - # Plot cloud mask if available - if (!is.null(cloud_mask)) { - terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name), - col = c("red", "green")) - } - - # If we have both RGB and cloud mask, create a side-by-side comparison - if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) { - old_par <- par(mfrow = c(1, 2)) - terra::plotRGB(mosaic_raster, main = "RGB Image") - - # Create a colored mask for visualization (red = cloud/shadow, green = clear) - mask_plot <- cloud_mask - terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green")) - par(old_par) - } - } - - # Log save completion - safe_log(paste("Mosaic saved to:", file_path)) - - return(file_path) -} - -#' Create weekly mosaic from pre-split tiles with MAX aggregation -#' -#' This function processes tiles created by Script 01 and processed by Script 02. -#' For each of the 25 tiles independently: -#' 1. Collects that tile from all dates in the range -#' 2. Calculates cloud coverage per date -#' 3. Uses create_mosaic logic to select best dates (cloud-clean preferred) -#' 4. Creates MAX composite for that tile -#' 5. Saves to weekly_tile_max/tile_XX.tif -#' -#' Input: merged_final_tif/[DATE]/[TILE_01.tif, TILE_02.tif, ..., TILE_25.tif] -#' Output: weekly_tile_max/tile_01.tif through tile_25.tif (25 weekly MAX tiles) -#' -#' @param dates List from date_list() containing days_filter vector -#' @param merged_final_dir Directory containing processed tiles (merged_final_tif) -#' @param tile_output_dir Directory to save weekly MAX tiles (weekly_tile_max) -#' @param field_boundaries Field boundaries for cloud coverage calculation (optional) -#' @return List of paths to created tile files -#' -create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output_dir, field_boundaries = NULL) { - - safe_log("Starting per-tile mosaic creation with cloud-based date selection...") - - # Create output directory if needed - dir.create(tile_output_dir, recursive = TRUE, showWarnings = FALSE) - - # Step 1: Discover all tiles from all dates and group by tile ID - tile_groups <- list() # Structure: tile_groups$tile_01 = list of files for that tile across dates - - for (date in dates$days_filter) { - date_dir <- file.path(merged_final_dir, date) - - if (!dir.exists(date_dir)) { - safe_log(paste(" Skipping date:", date, "- directory not found"), "WARNING") - next - } - - tile_files <- list.files(date_dir, pattern = "\\.tif$", full.names = TRUE) - - if (length(tile_files) == 0) { - safe_log(paste(" No tiles found for date:", date), "WARNING") - next - } - - # Extract tile ID from each filename (e.g., "2026-01-02_01.tif" → tile ID is "01") - for (tile_file in tile_files) { - # Extract tile number from filename - tile_basename <- basename(tile_file) - tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", tile_basename) - - if (!tile_id %in% names(tile_groups)) { - tile_groups[[tile_id]] <- list() - } - tile_groups[[tile_id]][[length(tile_groups[[tile_id]]) + 1]] <- tile_file - } - } - - if (length(tile_groups) == 0) { - stop("No tiles found in date range") - } - - safe_log(paste("Found", length(tile_groups), "unique tiles across all dates")) - - # Step 2: Process each tile independently - created_tiles <- character() - - for (tile_id in names(tile_groups)) { - tile_files_for_this_id <- unlist(tile_groups[[tile_id]]) - - safe_log(paste("Processing tile", tile_id, "with", length(tile_files_for_this_id), "dates")) - - # Step 2a: Calculate cloud coverage for this tile across all dates - cloud_stats_this_tile <- tryCatch({ - count_cloud_coverage_for_tile( - tile_files = tile_files_for_this_id, - field_boundaries = field_boundaries - ) - }, error = function(e) { - safe_log(paste(" Error calculating cloud coverage for tile", tile_id, "-", e$message), "WARNING") - NULL - }) - - if (is.null(cloud_stats_this_tile) || nrow(cloud_stats_this_tile) == 0) { - safe_log(paste(" No valid cloud stats for tile", tile_id, "- using all available dates"), "WARNING") - cloud_stats_this_tile <- NULL - } - - # Step 2b: Create MAX mosaic for this tile using create_mosaic logic - tile_mosaic <- tryCatch({ - create_mosaic( - tif_files = tile_files_for_this_id, - cloud_coverage_stats = cloud_stats_this_tile, - field_boundaries = NULL # Don't crop individual tiles - ) - }, error = function(e) { - safe_log(paste(" Error creating mosaic for tile", tile_id, "-", e$message), "WARNING") - NULL - }) - - if (is.null(tile_mosaic)) { - safe_log(paste(" Failed to create mosaic for tile", tile_id, "- skipping"), "WARNING") - next - } - - # DEBUG: Check mosaic content before saving - safe_log(paste(" DEBUG: Mosaic tile", tile_id, "dimensions:", nrow(tile_mosaic), "x", ncol(tile_mosaic))) - safe_log(paste(" DEBUG: Mosaic tile", tile_id, "bands:", terra::nlyr(tile_mosaic))) - - # Check first band values - band1 <- tile_mosaic[[1]] - band1_min <- terra::global(band1, fun = "min", na.rm = TRUE)$min - band1_max <- terra::global(band1, fun = "max", na.rm = TRUE)$max - safe_log(paste(" DEBUG: Band 1 MIN=", round(band1_min, 2), "MAX=", round(band1_max, 2))) - - # Step 2c: Save this tile's weekly MAX mosaic - # Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1) - tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_", - sprintf("%02d", as.integer(tile_id)), ".tif") - tile_output_path <- file.path(tile_output_dir, tile_filename) - - tryCatch({ - terra::writeRaster(tile_mosaic, tile_output_path, overwrite = TRUE) - safe_log(paste(" ✓ Saved tile", tile_id, "to:", tile_filename)) - created_tiles <- c(created_tiles, tile_output_path) - }, error = function(e) { - safe_log(paste(" Error saving tile", tile_id, "-", e$message), "WARNING") - }) - } - - safe_log(paste("✓ Created", length(created_tiles), "weekly MAX tiles in", tile_output_dir)) - - return(created_tiles) -} - -#' Calculate cloud coverage for a single tile across multiple dates -#' -#' Helper function for per-tile cloud assessment. -#' Takes tile files from different dates and calculates cloud coverage for each. -#' Cloud coverage is calculated ONLY within field boundaries, so total_pixels -#' varies per tile based on which fields are present in that tile area. -#' -#' @param tile_files Character vector of tile file paths from different dates -#' @param field_boundaries Field boundaries for analysis (required for per-field counting) -#' @return Data frame with cloud stats for each date/tile -#' -count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) { - if (length(tile_files) == 0) { - warning("No tile files provided for cloud coverage calculation") - return(NULL) - } - - aggregated_results <- list() - - for (idx in seq_along(tile_files)) { - tile_file <- tile_files[idx] - - tryCatch({ - # Load the tile - current_raster <- terra::rast(tile_file) - - # Extract the CI band (last band in 5-band output) - ci_band <- current_raster[[terra::nlyr(current_raster)]] - - # Calculate cloud coverage within field boundaries - if (!is.null(field_boundaries)) { - # Create a reference raster template (same extent/resolution as ci_band, but independent of its data) - # This ensures field_mask shows the potential field area even if ci_band is entirely cloudy (all NA) - ref_template <- terra::rast(terra::ext(ci_band), resolution = terra::res(ci_band), - crs = terra::crs(ci_band)) - terra::values(ref_template) <- 1 # Fill entire raster with 1s - - # Crop and mask to field boundaries: keeps 1 inside fields, NA outside - # This is independent of ci_band's data - represents the potential field area - field_mask <- terra::crop(ref_template, field_boundaries, mask = TRUE) - - # Count total potential field pixels from the mask (independent of clouds) - total_pixels <- terra::global(field_mask, fun = "notNA")$notNA - - # Now crop and mask CI band to field boundaries to count actual valid (non-cloud) pixels - ci_masked <- terra::crop(ci_band, field_boundaries, mask = TRUE) - total_notna <- terra::global(ci_masked, fun = "notNA")$notNA - } else { - # If no field boundaries provided, use entire tile - total_notna <- terra::global(ci_band, fun = "notNA")$notNA - total_pixels <- terra::ncell(ci_band) - } - - # Cloud coverage percentage (missing = clouds) - missing_pct <- round(100 - ((total_notna / total_pixels) * 100)) - - aggregated_results[[idx]] <- data.frame( - filename = basename(tile_file), # Keep full filename: 2026-01-07_03.tif - notNA = total_notna, - total_pixels = total_pixels, - missing_pixels_percentage = missing_pct, - thres_5perc = as.integer(missing_pct < 5), - thres_40perc = as.integer(missing_pct < 45), - stringsAsFactors = FALSE - ) - - }, error = function(e) { - safe_log(paste("Error processing tile:", basename(tile_file), "-", e$message), "WARNING") - aggregated_results[[idx]] <<- data.frame( - filename = tile_file, - notNA = NA_real_, - total_pixels = NA_real_, - missing_pixels_percentage = 100, - thres_5perc = 0, - thres_40perc = 0, - stringsAsFactors = FALSE - ) - }) - } - - # Combine results - aggregated_df <- if (length(aggregated_results) > 0) { - do.call(rbind, aggregated_results) - } else { - data.frame() - } - - return(aggregated_df) -} - diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 82add7a..65e4bae 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -1,28 +1,55 @@ -# 80_CALCULATE_KPIS.R (CONSOLIDATED KPI CALCULATION) # ============================================================================ -# UNIFIED KPI CALCULATION SCRIPT -# -# This script combines: -# 1. Per-field weekly analysis (from 09c: field-level trends, phases, statuses) -# 2. Farm-level KPI metrics (from old 09: 6 high-level indicators) +# SCRIPT 80: Key Performance Indicator (KPI) Calculation +# ============================================================================ +# PURPOSE: +# Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes +# field uniformity, growth trends (4-week and 8-week), area change detection, +# TCH forecasts, stress identification, and weed presence. Generates +# comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting. # -# FEATURES: -# - Per-field analysis with SC-64 enhancements (4-week trends, CI percentiles, etc.) -# - Farm-level KPI calculation (6 metrics for executive overview) -# - Parallel processing (tile-aware, 1000+ fields supported) -# - Comprehensive Excel + RDS + CSV exports (21 columns per spec) -# - Test mode for development - -# CRITICAL INTEGRATIONS: +# INPUT DATA: +# - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif +# - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx) +# - Source 3: Historical field stats (RDS from previous weeks) # -# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307) -# [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv -# Columns: field, imminent_prob, detected_prob, week, year -# [✓] LEFT JOIN to field_analysis_df by field -# [✓] Use actual harvest probability data instead of placeholder +# OUTPUT DATA: +# - Destination: laravel_app/storage/app/{project}/output/ +# - Format: Excel (.xlsx), CSV (.csv), RDS (.rds) +# - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata # -# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23) -# [✓] Load harvest.xlsx with planting_date (season_start) +# USAGE: +# Rscript 80_calculate_kpis.R [project] [week] [year] +# +# Example (Windows PowerShell): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026 +# +# PARAMETERS: +# - project: Project name (character) - angata, chemba, xinavane, esa, simba +# - week: ISO week number (numeric, 1-53, default current week) +# - year: ISO year (numeric, default current year) +# +# CLIENT TYPES: +# - cane_supply (ANGATA): Yes - uses 80_utils_cane_supply.R (placeholder) +# - agronomic_support (AURA): Yes - uses 80_utils_agronomic_support.R (6 KPI funcs) +# +# DEPENDENCIES: +# - Packages: terra, sf, tidyverse, lubridate, writexl, spdep +# - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R +# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx) +# - Input data: Weekly mosaic TIFFs (Script 40 output) +# - Data directories: weekly_mosaic/, output/ (created if missing) +# +# NOTES: +# - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast, +# Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling % +# - Client-aware: Conditional sourcing based on client_config$script_90_compatible +# - Exports: 21-column output with field-level metrics, phase, status, alerts +# - Supports 4-week and 8-week trend analysis from historical RDS cache +# - Critical dependency for Scripts 90/91 (reporting/dashboards) +# - Uses Moran's I for spatial clustering detection (weed/stress patterns) +# +# +# ============================================================================ # [✓] Extract planting dates per field # [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks") # @@ -96,48 +123,77 @@ WEEKS_FOR_CV_TREND_LONG <- 8 # ============================================================================ suppressPackageStartupMessages({ - library(here) - library(sf) - library(terra) - library(dplyr) - library(tidyr) - library(lubridate) - library(readr) - library(readxl) - library(writexl) - library(purrr) - library(furrr) - library(future) - library(caret) - library(CAST) - library(randomForest) + # File path handling + library(here) # For relative path resolution (platform-independent file paths) + + # Spatial data handling + library(sf) # For reading/manipulating field boundaries (GeoJSON) + library(terra) # For raster operations (reading mosaic TIFFs) + + # Data manipulation + library(dplyr) # For data wrangling (filter, mutate, group_by, summarize) + library(tidyr) # For data reshaping (pivot_longer, pivot_wider, gather) + library(lubridate) # For date/time operations (week extraction, date arithmetic) + + # File I/O + library(readr) # For reading CSV files (harvest predictions from Python) + library(readxl) # For reading harvest.xlsx (harvest dates for field mapping) + library(writexl) # For writing Excel outputs (KPI summary tables) + library(progress) # For progress bars during field processing + + # ML/Analysis (optional - only for harvest model inference) tryCatch({ - library(torch) + library(torch) # For PyTorch model inference (harvest readiness prediction) }, error = function(e) { message("Note: torch package not available - harvest model inference will be skipped") }) }) +# ============================================================================ +# LOAD CONFIGURATION - MUST BE DONE FIRST +# ============================================================================ +# Parameters must be loaded early to determine client type and paths + +tryCatch({ + source(here("r_app", "parameters_project.R")) +}, error = function(e) { + stop("Error loading parameters_project.R: ", e$message) +}) + +# Get client configuration from global project setup +# NOTE: This cannot be done until parameters_project.R is sourced +# We determine client_type from the current project_dir (if running in main() context) +# For now, set a placeholder that will be overridden in main() +if (exists("project_dir", envir = .GlobalEnv)) { + temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv)) +} else { + temp_client_type <- "cane_supply" # Safe default +} +temp_client_config <- get_client_kpi_config(temp_client_type) + # ============================================================================ # LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES # ============================================================================ - +# All clients use the common utilities (shared statistical functions, reporting) tryCatch({ - source(here("r_app", "80_weekly_stats_utils.R")) + source(here("r_app", "80_utils_common.R")) }, error = function(e) { - stop("Error loading 80_weekly_stats_utils.R: ", e$message) + stop("Error loading 80_utils_common.R: ", e$message) +}) + +# Load both client-specific utilities (functions will be available for both workflows) +# This avoids needing to determine client type at startup time +message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...") +tryCatch({ + source(here("r_app", "80_utils_agronomic_support.R")) +}, error = function(e) { + stop("Error loading 80_utils_agronomic_support.R: ", e$message) }) tryCatch({ - source(here("r_app", "80_report_building_utils.R")) + source(here("r_app", "80_utils_cane_supply.R")) }, error = function(e) { - stop("Error loading 80_report_building_utils.R: ", e$message) -}) - -tryCatch({ - source(here("r_app", "kpi_utils.R")) -}, error = function(e) { - stop("Error loading kpi_utils.R: ", e$message) + stop("Error loading 80_utils_cane_supply.R: ", e$message) }) # ============================================================================ @@ -176,9 +232,6 @@ STATUS_TRIGGERS <- data.frame( ) -# ============================================================================ -# MAIN -# ============================================================================ main <- function() { # Parse command-line arguments @@ -187,7 +240,8 @@ main <- function() { # end_date (arg 1) # Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date() end_date <- if (length(args) >= 1 && !is.na(args[1])) { - as.Date(args[1]) + # Parse date explicitly in YYYY-MM-DD format from command line + as.Date(args[1], format = "%Y-%m-%d") } else if (exists("end_date", envir = .GlobalEnv)) { global_date <- get("end_date", envir = .GlobalEnv) # Check if it's a valid Date with length > 0 @@ -233,7 +287,7 @@ main <- function() { message(strrep("=", 70)) message("Date:", format(end_date, "%Y-%m-%d")) message("Project:", project_dir) - message("Mode: Per-field analysis (SC-64) + Farm-level KPIs") + message("Mode: Conditional KPI calculation based on client type") message("") # Load configuration and utilities @@ -245,48 +299,151 @@ main <- function() { stop("Error loading parameters_project.R: ", e$message) }) + # Initialize project directories and configuration + setup <- setup_project_directories(project_dir) + + # DETERMINE CLIENT TYPE AND KPI CONFIGURATION + client_type <- get_client_type(project_dir) + client_config <- get_client_kpi_config(client_type) + + # Assign to global environment so utilities and downstream scripts can access it + assign("client_config", client_config, envir = .GlobalEnv) + + message("Client Type:", client_type) + message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", ")) + message("Output Formats:", paste(client_config$outputs, collapse = ", ")) + + # Use centralized paths from setup object (no need for file.path calls) + weekly_mosaic <- setup$weekly_mosaic_dir + daily_vals_dir <- setup$daily_ci_vals_dir + tryCatch({ source(here("r_app", "30_growth_model_utils.R")) }, error = function(e) { warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data") }) - # ========== PER-FIELD ANALYSIS (SC-64) ========== + # CONDITIONAL EXECUTION BASED ON CLIENT TYPE + # ============================================ - message("\n", strrep("-", 70)) - message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)") - message(strrep("-", 70)) - - # Calculate ISO week numbers and ISO years using helper from kpi_utils.R + if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) { + # AURA WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility + message("\n", strrep("=", 70)) + message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)") + message(strrep("=", 70)) + + # Prepare inputs for KPI calculation (already created by setup_project_directories) + reports_dir_kpi <- setup$kpi_reports_dir + cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir + + # Load field boundaries for AURA workflow (use data_dir from setup) + message("\nLoading field boundaries for AURA KPI calculation...") + tryCatch({ + boundaries_result <- load_field_boundaries(setup$data_dir) + + if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { + field_boundaries_sf <- boundaries_result$field_boundaries_sf + } else { + field_boundaries_sf <- boundaries_result + } + + if (nrow(field_boundaries_sf) == 0) { + stop("No fields loaded from boundaries") + } + + message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields")) + }, error = function(e) { + stop("ERROR loading field boundaries: ", e$message) + }) + + # Load harvesting data + if (!exists("harvesting_data")) { + warning("harvesting_data not loaded. TCH KPI will use placeholder values.") + harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) + } + + # Extract current week/year from end_date + current_week <- as.numeric(format(end_date, "%V")) + current_year <- as.numeric(format(end_date, "%G")) + + # Call with correct signature + kpi_results <- calculate_all_kpis( + field_boundaries_sf = field_boundaries_sf, + current_week = current_week, + current_year = current_year, + current_mosaic_dir = setup$weekly_mosaic_dir, + harvesting_data = harvesting_data, + ci_rds_path = cumulative_CI_vals_dir, + output_dir = reports_dir_kpi + ) + + cat("\n=== AURA KPI CALCULATION COMPLETE ===\n") + cat("Summary tables saved for Script 90 integration\n") + cat("Output directory:", reports_dir_kpi, "\n\n") + + } else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) { + # CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment + message("\n", strrep("=", 70)) + message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") + message(strrep("=", 70)) + + # Set reports_dir for CANE_SUPPLY workflow (used by export functions) + reports_dir <- setup$kpi_reports_dir + data_dir <- setup$data_dir + + # Continue with existing per-field analysis code below + + message("\n", strrep("-", 70)) + message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") + message(strrep("-", 70)) weeks <- calculate_week_numbers(end_date) current_week <- weeks$current_week - current_iso_year <- weeks$current_iso_year + current_year <- weeks$current_year previous_week <- weeks$previous_week - previous_iso_year <- weeks$previous_iso_year - - message(paste("Week:", current_week, "/ ISO Year:", current_iso_year)) + previous_year <- weeks$previous_year - # Find tile files - approach from Script 20 - message("Finding tile files...") - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, current_iso_year) + message(paste("Week:", current_week, "/ Year (ISO 8601):", year)) - # Detect grid size subdirectory - detected_grid_size <- NA - if (dir.exists(weekly_tile_max)) { - subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - detected_grid_size <- grid_patterns[1] - mosaic_dir <- file.path(weekly_tile_max, detected_grid_size) - message(paste(" Using grid-size subdirectory:", detected_grid_size)) + # Find per-field weekly mosaics + message("Finding per-field weekly mosaics...") + single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year) + + if (!dir.exists(weekly_mosaic)) { + stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, + "\nScript 40 (mosaic creation) must be run first.")) + } + + field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + if (length(field_dirs) == 0) { + stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, + "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) + } + + # Verify we have mosaics for this week + single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year) + per_field_files <- c() + for (field in field_dirs) { + field_mosaic_dir <- file.path(weekly_mosaic, field) + files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(files) > 0) { + per_field_files <- c(per_field_files, files) } } - tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) - if (length(tile_files) == 0) { - stop(paste("No tile files found for week", current_week, current_iso_year, "in", mosaic_dir)) + if (length(per_field_files) == 0) { + stop(paste("ERROR: No mosaics found for week", current_week, "year", year, + "\nExpected pattern:", single_file_pattern, + "\nChecked:", weekly_mosaic)) } - message(paste(" Found", length(tile_files), "tiles")) + + message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) + + mosaic_mode <- "per-field" + mosaic_dir <- weekly_mosaic + + # Load field boundaries tryCatch({ @@ -316,10 +473,11 @@ main <- function() { # Only auto-generate on first call (not in recursive calls from within load_historical_field_data) allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) - historical_data <- load_historical_field_data(project_dir, current_week, reports_dir, + historical_data <- load_historical_field_data(project_dir, current_week, year, reports_dir, num_weeks = num_weeks_to_load, auto_generate = allow_auto_gen, - field_boundaries_sf = field_boundaries_sf) + field_boundaries_sf = field_boundaries_sf, + daily_vals_dir = daily_vals_dir) # Load harvest.xlsx for planting dates (season_start) message("\nLoading harvest data from harvest.xlsx for planting dates...") @@ -353,14 +511,15 @@ main <- function() { ) } - # SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile - # ============================================================================ - # NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends - # ============================================================================ + # Build per-field configuration + message("\nPreparing mosaic configuration for statistics calculation...") + message(" ✓ Using per-field mosaic architecture (1 TIFF per field)") - # Build tile grid (needed by calculate_field_statistics) - message("\nBuilding tile grid for current week...") - tile_grid <- build_tile_grid(mosaic_dir, current_week, current_iso_year) + # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif + field_grid <- list( + mosaic_dir = mosaic_dir, + mode = "per-field" + ) message("\nUsing modular RDS-based approach for weekly statistics...") @@ -371,7 +530,7 @@ main <- function() { year = current_iso_year, project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, - mosaic_dir = tile_grid$mosaic_dir, + mosaic_dir = field_grid$mosaic_dir, reports_dir = reports_dir, report_date = end_date ) @@ -386,10 +545,10 @@ main <- function() { prev_stats <- load_or_calculate_weekly_stats( week_num = previous_week, - year = previous_iso_year, + year = previous_year, project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, - mosaic_dir = tile_grid$mosaic_dir, + mosaic_dir = field_grid$mosaic_dir, reports_dir = reports_dir, report_date = prev_report_date ) @@ -409,9 +568,11 @@ main <- function() { message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) # Load weekly harvest probabilities from script 31 (if available) + # Note: Script 31 saves to reports/kpis/field_stats/ (not field_level) message("\n4. Loading harvest probabilities from script 31...") - harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats", - sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_iso_year)) + harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats") + harvest_prob_file <- file.path(harvest_prob_dir, + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year)) message(paste(" Looking for:", harvest_prob_file)) imminent_prob_data <- tryCatch({ @@ -854,6 +1015,14 @@ main <- function() { cat(" - Per-field data exported\n") cat(" - Farm-level KPIs calculated\n") cat(" - All outputs in:", reports_dir, "\n\n") + + } else { + # Unknown client type - log warning and exit + warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) + cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "") + cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n") + cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n") + } } if (sys.nframe() == 0) { diff --git a/r_app/80_report_building_utils.R b/r_app/80_report_building_utils.R deleted file mode 100644 index 0c5db3c..0000000 --- a/r_app/80_report_building_utils.R +++ /dev/null @@ -1,258 +0,0 @@ -# 80_REPORT_BUILDING_UTILS.R -# ============================================================================ -# UTILITY FUNCTIONS FOR REPORT GENERATION AND EXCEL/CSV EXPORT -# -# This file contains reusable functions for: -# - Field analysis summary generation -# - Excel/CSV/RDS export functionality -# - Farm-level KPI aggregation and summary -# - Tile-based KPI extraction (alternative calculation method) -# -# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts -# ============================================================================ - -# ============================================================================ -# SUMMARY GENERATION -# ============================================================================ - -generate_field_analysis_summary <- function(field_df) { - message("Generating summary statistics...") - - total_acreage <- sum(field_df$Acreage, na.rm = TRUE) - - germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE) - tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE) - grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE) - maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE) - unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE) - - harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) - stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) - recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) - growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) - germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) - germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) - no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) - - clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) - partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) - no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) - total_fields <- nrow(field_df) - - clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) - partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) - no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) - - summary_df <- data.frame( - Category = c( - "--- PHASE DISTRIBUTION ---", - "Germination", - "Tillering", - "Grand Growth", - "Maturation", - "Unknown phase", - "--- STATUS TRIGGERS ---", - "Harvest ready", - "Stress detected", - "Strong recovery", - "Growth on track", - "Germination complete", - "Germination started", - "No trigger", - "--- CLOUD COVERAGE (FIELDS) ---", - "Clear view", - "Partial coverage", - "No image available", - "--- CLOUD COVERAGE (ACREAGE) ---", - "Clear view", - "Partial coverage", - "No image available", - "--- TOTAL ---", - "Total Acreage" - ), - Acreage = c( - NA, - round(germination_acreage, 2), - round(tillering_acreage, 2), - round(grand_growth_acreage, 2), - round(maturation_acreage, 2), - round(unknown_phase_acreage, 2), - NA, - round(harvest_ready_acreage, 2), - round(stress_acreage, 2), - round(recovery_acreage, 2), - round(growth_on_track_acreage, 2), - round(germination_complete_acreage, 2), - round(germination_started_acreage, 2), - round(no_trigger_acreage, 2), - NA, - paste0(clear_fields, " fields"), - paste0(partial_fields, " fields"), - paste0(no_image_fields, " fields"), - NA, - round(clear_acreage, 2), - round(partial_acreage, 2), - round(no_image_acreage, 2), - NA, - round(total_acreage, 2) - ), - stringsAsFactors = FALSE - ) - - return(summary_df) -} - -# ============================================================================ -# EXPORT FUNCTIONS -# ============================================================================ - -export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) { - message("Exporting per-field analysis to Excel, CSV, and RDS...") - - field_df_rounded <- field_df %>% - mutate(across(where(is.numeric), ~ round(., 2))) - - # Handle NULL summary_df - summary_df_rounded <- if (!is.null(summary_df)) { - summary_df %>% - mutate(across(where(is.numeric), ~ round(., 2))) - } else { - NULL - } - - output_subdir <- file.path(reports_dir, "kpis", "field_analysis") - if (!dir.exists(output_subdir)) { - dir.create(output_subdir, recursive = TRUE) - } - - excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx") - excel_path <- file.path(output_subdir, excel_filename) - excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) - - # Build sheets list dynamically - sheets <- list( - "Field Data" = field_df_rounded - ) - if (!is.null(summary_df_rounded)) { - sheets[["Summary"]] <- summary_df_rounded - } - - write_xlsx(sheets, excel_path) - message(paste("✓ Field analysis Excel exported to:", excel_path)) - - kpi_data <- list( - field_analysis = field_df_rounded, - field_analysis_summary = summary_df_rounded, - metadata = list( - current_week = current_week, - year = year, - project = project_dir, - created_at = Sys.time() - ) - ) - - rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") - rds_path <- file.path(reports_dir, "kpis", rds_filename) - - saveRDS(kpi_data, rds_path) - message(paste("✓ Field analysis RDS exported to:", rds_path)) - - csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv") - csv_path <- file.path(output_subdir, csv_filename) - write_csv(field_df_rounded, csv_path) - message(paste("✓ Field analysis CSV exported to:", csv_path)) - - return(list(excel = excel_path, rds = rds_path, csv = csv_path)) -} - -# ============================================================================ -# TILE-BASED KPI EXTRACTION (Alternative calculation method) -# ============================================================================ - -# [COMMENTED OUT / UNUSED - kept for reference] -# These functions provide tile-based extraction as an alternative to field_statistics approach -# Currently replaced by calculate_field_statistics() in 80_weekly_stats_utils.R -# Uncomment if parallel processing of tiles is needed in future - -# calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) { -# message("Calculating field-level KPI statistics from tiles...") -# -# tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) -# tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE) -# -# if (length(tile_files) == 0) { -# message("No tiles found for week", week_num, year) -# return(NULL) -# } -# -# message(paste("Processing", length(tile_files), "tiles in parallel...")) -# -# field_kpi_list <- furrr::future_map( -# tile_files, -# ~ process_single_kpi_tile( -# tile_file = ., -# field_boundaries_sf = field_boundaries_sf, -# tile_grid = tile_grid -# ), -# .progress = TRUE, -# .options = furrr::furrr_options(seed = TRUE) -# ) -# -# field_kpi_stats <- dplyr::bind_rows(field_kpi_list) -# -# if (nrow(field_kpi_stats) == 0) { -# message(" No KPI data extracted from tiles") -# return(NULL) -# } -# -# message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields")) -# return(field_kpi_stats) -# } - -# process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) { -# # Helper function for calculate_field_kpis_from_tiles -# tryCatch({ -# tile_basename <- basename(tile_file) -# tile_raster <- terra::rast(tile_file) -# ci_band <- tile_raster[[1]] -# -# field_bbox <- sf::st_bbox(field_boundaries_sf) -# ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out") -# -# extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE) -# -# tile_results <- data.frame() -# tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename)) -# -# for (field_idx in seq_len(nrow(field_boundaries_sf))) { -# field_id <- field_boundaries_sf$field[field_idx] -# mean_ci <- extracted_vals[field_idx, 2] -# -# if (is.na(mean_ci)) { -# next -# } -# -# tile_results <- rbind(tile_results, data.frame( -# field = field_id, -# tile_id = tile_id_match, -# tile_file = tile_basename, -# mean_ci = round(mean_ci, 4), -# stringsAsFactors = FALSE -# )) -# } -# -# return(tile_results) -# -# }, error = function(e) { -# message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message)) -# return(data.frame()) -# }) -# } - -# calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf, -# harvesting_data, cumulative_CI_vals_dir, -# weekly_CI_mosaic, reports_dir, current_week, year, -# tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") { -# # Farm-level KPI calculation using tile-based extraction (alternative approach) -# # [Implementation kept as reference for alternative calculation method] -# } diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R new file mode 100644 index 0000000..c1d710a --- /dev/null +++ b/r_app/80_utils_agronomic_support.R @@ -0,0 +1,666 @@ +# 80_UTILS_AGRONOMIC_SUPPORT.R +# ============================================================================ +# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support) +# +# Contains all 6 AURA KPI calculation functions and helpers: +# - Field uniformity KPI (CV-based, spatial autocorrelation) +# - Area change KPI (week-over-week CI changes) +# - TCH forecasted KPI (tonnage projections from harvest data) +# - Growth decline KPI (trend analysis) +# - Weed presence KPI (field fragmentation detection) +# - Gap filling KPI (interpolation quality) +# - KPI reporting (summary tables, field details, text interpretation) +# - KPI export (Excel, RDS, data export) +# +# Orchestrator: calculate_all_kpis() +# Dependencies: 00_common_utils.R (safe_log), sourced from common +# Used by: 80_calculate_kpis.R (when client_type == "agronomic_support") +# ============================================================================ + +library(terra) +library(sf) +library(dplyr) +library(tidyr) +library(readxl) +library(writexl) +library(spdep) +library(caret) +library(CAST) + +# ============================================================================ +# SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R) +# ============================================================================ +# The following helper functions have been moved to 80_utils_common.R: +# - calculate_cv() +# - calculate_change_percentages() +# - calculate_spatial_autocorrelation() +# - extract_ci_values() +# - calculate_week_numbers() +# - load_field_ci_raster() +# - load_weekly_ci_mosaic() +# - prepare_predictions() +# +# These are now sourced from common utils and shared by all client types. +# ============================================================================ + +#' Prepare harvest predictions and ensure proper alignment with field data +prepare_predictions <- function(harvest_model, field_data, scenario = "optimistic") { + if (is.null(harvest_model) || is.null(field_data)) { + return(NULL) + } + + tryCatch({ + scenario_factor <- switch(scenario, + "pessimistic" = 0.85, + "realistic" = 1.0, + "optimistic" = 1.15, + 1.0) + + predictions <- field_data %>% + mutate(tch_forecasted = field_data$mean_ci * scenario_factor) + + return(predictions) + }, error = function(e) { + message(paste("Error preparing predictions:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# AURA KPI CALCULATION FUNCTIONS (6 KPIS) +# ============================================================================ + +#' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation +#' +#' Measures how uniform crop development is across the field. +#' Low CV + high positive Moran's I = excellent uniformity +#' +#' @param ci_pixels_by_field List of CI pixel arrays for each field +#' @param field_boundaries_sf SF object with field geometries +#' @param ci_band Raster band with CI values +#' +#' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation +calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL) { + result <- data.frame( + field_idx = integer(), + cv_value = numeric(), + morans_i = numeric(), + uniformity_score = numeric(), + interpretation = character(), + stringsAsFactors = FALSE + ) + + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + ci_pixels <- ci_pixels_by_field[[field_idx]] + + if (is.null(ci_pixels) || length(ci_pixels) == 0) { + result <- rbind(result, data.frame( + field_idx = field_idx, + cv_value = NA_real_, + morans_i = NA_real_, + uniformity_score = NA_real_, + interpretation = "No data", + stringsAsFactors = FALSE + )) + next + } + + cv_val <- calculate_cv(ci_pixels) + + morans_i <- NA_real_ + if (!is.null(ci_band)) { + morans_result <- calculate_spatial_autocorrelation(ci_pixels, field_boundaries_sf[field_idx, ]) + if (is.list(morans_result)) { + morans_i <- morans_result$morans_i + } else { + morans_i <- morans_result + } + } + + # Normalize CV (0-1 scale, invert so lower CV = higher score) + cv_normalized <- min(cv_val / 0.3, 1) # 0.3 = threshold for CV + cv_score <- 1 - cv_normalized + + # Normalize Moran's I (-1 to 1 scale, shift to 0-1) + morans_normalized <- if (!is.na(morans_i)) { + (morans_i + 1) / 2 + } else { + 0.5 + } + + uniformity_score <- 0.7 * cv_score + 0.3 * morans_normalized + + # Interpretation + if (is.na(cv_val)) { + interpretation <- "No data" + } else if (cv_val < 0.08) { + interpretation <- "Excellent uniformity" + } else if (cv_val < 0.15) { + interpretation <- "Good uniformity" + } else if (cv_val < 0.25) { + interpretation <- "Acceptable uniformity" + } else if (cv_val < 0.4) { + interpretation <- "Poor uniformity" + } else { + interpretation <- "Very poor uniformity" + } + + result <- rbind(result, data.frame( + field_idx = field_idx, + cv_value = cv_val, + morans_i = morans_i, + uniformity_score = round(uniformity_score, 3), + interpretation = interpretation, + stringsAsFactors = FALSE + )) + } + + return(result) +} + +#' KPI 2: Calculate area change metric (week-over-week CI changes) +#' +#' Tracks the percentage change in CI between current and previous week +#' +#' @param current_stats Current week field statistics (from extract_field_statistics_from_ci) +#' @param previous_stats Previous week field statistics +#' +#' @return Data frame with field-level CI changes +calculate_area_change_kpi <- function(current_stats, previous_stats) { + result <- calculate_change_percentages(current_stats, previous_stats) + + # Add interpretation + result$interpretation <- NA_character_ + + for (i in seq_len(nrow(result))) { + change <- result$mean_ci_pct_change[i] + + if (is.na(change)) { + result$interpretation[i] <- "No previous data" + } else if (change > 15) { + result$interpretation[i] <- "Rapid growth" + } else if (change > 5) { + result$interpretation[i] <- "Positive growth" + } else if (change > -5) { + result$interpretation[i] <- "Stable" + } else if (change > -15) { + result$interpretation[i] <- "Declining" + } else { + result$interpretation[i] <- "Rapid decline" + } + } + + return(result) +} + +#' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare) +#' +#' Projects final harvest tonnage based on CI growth trajectory +#' +#' @param field_statistics Current field statistics +#' @param harvesting_data Historical harvest data (with yield observations) +#' @param field_boundaries_sf Field geometries +#' +#' @return Data frame with field-level TCH forecasts +calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) { + result <- data.frame( + field_idx = field_statistics$field_idx, + mean_ci = field_statistics$mean_ci, + tch_forecasted = NA_real_, + tch_lower_bound = NA_real_, + tch_upper_bound = NA_real_, + confidence = NA_character_, + stringsAsFactors = FALSE + ) + + # Base TCH model: TCH = 50 + (CI * 10) + # This is a simplified model; production use should include more variables + + for (i in seq_len(nrow(result))) { + if (is.na(result$mean_ci[i])) { + result$confidence[i] <- "No data" + next + } + + ci_val <- result$mean_ci[i] + + # Simple linear model + tch_est <- 50 + (ci_val * 10) + + # Confidence interval based on CI range + tch_lower <- tch_est * 0.85 + tch_upper <- tch_est * 1.15 + + result$tch_forecasted[i] <- round(tch_est, 2) + result$tch_lower_bound[i] <- round(tch_lower, 2) + result$tch_upper_bound[i] <- round(tch_upper, 2) + result$confidence[i] <- "Medium" + } + + return(result) +} + +#' KPI 4: Calculate growth decline indicator +#' +#' Identifies fields with negative growth trajectory +#' +#' @param ci_values_list List of CI values for each field (multiple weeks) +#' +#' @return Data frame with field-level decline indicators +calculate_growth_decline_kpi <- function(ci_values_list) { + result <- data.frame( + field_idx = seq_len(length(ci_values_list)), + four_week_trend = NA_real_, + trend_interpretation = NA_character_, + decline_severity = NA_character_, + stringsAsFactors = FALSE + ) + + for (field_idx in seq_len(length(ci_values_list))) { + ci_vals <- ci_values_list[[field_idx]] + + if (is.null(ci_vals) || length(ci_vals) < 2) { + result$trend_interpretation[field_idx] <- "Insufficient data" + next + } + + ci_vals <- ci_vals[!is.na(ci_vals)] + if (length(ci_vals) < 2) { + result$trend_interpretation[field_idx] <- "Insufficient data" + next + } + + # Calculate linear trend + weeks <- seq_along(ci_vals) + lm_fit <- lm(ci_vals ~ weeks) + slope <- coef(lm_fit)["weeks"] + + result$four_week_trend[field_idx] <- round(as.numeric(slope), 3) + + if (slope > 0.1) { + result$trend_interpretation[field_idx] <- "Strong growth" + result$decline_severity[field_idx] <- "None" + } else if (slope > 0) { + result$trend_interpretation[field_idx] <- "Weak growth" + result$decline_severity[field_idx] <- "None" + } else if (slope > -0.1) { + result$trend_interpretation[field_idx] <- "Slight decline" + result$decline_severity[field_idx] <- "Low" + } else if (slope > -0.3) { + result$trend_interpretation[field_idx] <- "Moderate decline" + result$decline_severity[field_idx] <- "Medium" + } else { + result$trend_interpretation[field_idx] <- "Strong decline" + result$decline_severity[field_idx] <- "High" + } + } + + return(result) +} + +#' KPI 5: Calculate weed presence indicator +#' +#' Detects field fragmentation/patchiness (potential weed/pest pressure) +#' +#' @param ci_pixels_by_field List of CI pixel arrays for each field +#' +#' @return Data frame with fragmentation indicators +calculate_weed_presence_kpi <- function(ci_pixels_by_field) { + result <- data.frame( + field_idx = seq_len(length(ci_pixels_by_field)), + cv_value = NA_real_, + low_ci_percent = NA_real_, + fragmentation_index = NA_real_, + weed_pressure_risk = NA_character_, + stringsAsFactors = FALSE + ) + + for (field_idx in seq_len(length(ci_pixels_by_field))) { + ci_pixels <- ci_pixels_by_field[[field_idx]] + + if (is.null(ci_pixels) || length(ci_pixels) == 0) { + result$weed_pressure_risk[field_idx] <- "No data" + next + } + + ci_pixels <- ci_pixels[!is.na(ci_pixels)] + if (length(ci_pixels) == 0) { + result$weed_pressure_risk[field_idx] <- "No data" + next + } + + cv_val <- calculate_cv(ci_pixels) + low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100 + fragmentation <- cv_val * low_ci_pct / 100 + + result$cv_value[field_idx] <- cv_val + result$low_ci_percent[field_idx] <- round(low_ci_pct, 2) + result$fragmentation_index[field_idx] <- round(fragmentation, 3) + + if (is.na(fragmentation)) { + result$weed_pressure_risk[field_idx] <- "No data" + } else if (fragmentation > 0.15) { + result$weed_pressure_risk[field_idx] <- "High" + } else if (fragmentation > 0.08) { + result$weed_pressure_risk[field_idx] <- "Medium" + } else if (fragmentation > 0.04) { + result$weed_pressure_risk[field_idx] <- "Low" + } else { + result$weed_pressure_risk[field_idx] <- "Minimal" + } + } + + return(result) +} + +#' KPI 6: Calculate gap filling quality (data interpolation success) +#' +#' Measures how well cloud/missing data was interpolated during growth model +#' +#' @param ci_rds_path Path to combined CI RDS file (before/after interpolation) +#' +#' @return Data frame with gap-filling quality metrics +calculate_gap_filling_kpi <- function(ci_rds_path) { + # If ci_rds_path is NULL or not a valid path, return placeholder + if (is.null(ci_rds_path) || !is.character(ci_rds_path) || length(ci_rds_path) == 0) { + return(NULL) + } + + # If ci_rds_path is a directory, find the cumulative CI file + if (dir.exists(ci_rds_path)) { + ci_files <- list.files(ci_rds_path, pattern = "^All_pivots.*\\.rds$", full.names = TRUE) + if (length(ci_files) == 0) { + return(NULL) + } + ci_rds_path <- ci_files[1] + } + + if (!file.exists(ci_rds_path)) { + return(NULL) + } + + tryCatch({ + ci_data <- readRDS(ci_rds_path) + + # ci_data should be a wide matrix: fields × weeks + # NA values = missing data before interpolation + # (Gap filling is done during growth model stage) + + result <- data.frame( + field_idx = seq_len(nrow(ci_data)), + na_percent_pre_interpolation = NA_real_, + na_percent_post_interpolation = NA_real_, + gap_filling_success = NA_character_, + stringsAsFactors = FALSE + ) + + for (field_idx in seq_len(nrow(ci_data))) { + na_count <- sum(is.na(ci_data[field_idx, ])) + na_pct <- na_count / ncol(ci_data) * 100 + + if (na_pct == 0) { + result$gap_filling_success[field_idx] <- "No gaps (100% data)" + } else if (na_pct < 10) { + result$gap_filling_success[field_idx] <- "Excellent" + } else if (na_pct < 25) { + result$gap_filling_success[field_idx] <- "Good" + } else if (na_pct < 40) { + result$gap_filling_success[field_idx] <- "Fair" + } else { + result$gap_filling_success[field_idx] <- "Poor" + } + + result$na_percent_pre_interpolation[field_idx] <- round(na_pct, 2) + } + + return(result) + }, error = function(e) { + message(paste("Error calculating gap filling KPI:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# KPI ORCHESTRATOR AND REPORTING +# ============================================================================ + +#' Create summary tables for all 6 KPIs +#' +#' @param all_kpis List containing results from all 6 KPI functions +#' +#' @return List of summary data frames ready for reporting +create_summary_tables <- function(all_kpis) { + kpi_summary <- list( + uniformity = all_kpis$uniformity %>% + select(field_idx, cv_value, morans_i, uniformity_score, interpretation), + + area_change = all_kpis$area_change %>% + select(field_idx, mean_ci_pct_change, interpretation), + + tch_forecast = all_kpis$tch_forecasted %>% + select(field_idx, mean_ci, tch_forecasted, tch_lower_bound, tch_upper_bound, confidence), + + growth_decline = all_kpis$growth_decline %>% + select(field_idx, four_week_trend, trend_interpretation, decline_severity), + + weed_pressure = all_kpis$weed_presence %>% + select(field_idx, fragmentation_index, weed_pressure_risk), + + gap_filling = if (!is.null(all_kpis$gap_filling)) { + all_kpis$gap_filling %>% + select(field_idx, na_percent_pre_interpolation, gap_filling_success) + } else { + NULL + } + ) + + return(kpi_summary) +} + +#' Create detailed field-by-field KPI report +#' +#' @param field_df Data frame with field identifiers and acreage +#' @param all_kpis List with all KPI results +#' @param field_boundaries_sf SF object with field boundaries +#' +#' @return Data frame with one row per field, all KPI columns +create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { + result <- field_df %>% + left_join( + all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_interpretation = interpretation), + by = c("field_idx") + ) %>% + left_join( + all_kpis$area_change %>% select(field_idx, mean_ci_pct_change), + by = c("field_idx") + ) %>% + left_join( + all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted), + by = c("field_idx") + ) %>% + left_join( + all_kpis$growth_decline %>% select(field_idx, decline_severity), + by = c("field_idx") + ) %>% + left_join( + all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk), + by = c("field_idx") + ) + + return(result) +} + +#' Generate KPI text interpretation for inclusion in Word report +#' +#' @param all_kpis List with all KPI results +#' +#' @return Character string with formatted KPI summary text +create_field_kpi_text <- function(all_kpis) { + text_parts <- c( + "## AURA KPI ANALYSIS SUMMARY\n", + "### Field Uniformity\n", + paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n", + "### Growth Trends\n", + paste(all_kpis$growth_decline$trend_interpretation, collapse = "; "), "\n", + "### Weed/Pest Pressure\n", + paste(all_kpis$weed_presence$weed_pressure_risk, collapse = "; "), "\n" + ) + + return(paste(text_parts, collapse = "")) +} + +#' Export detailed KPI data to Excel/RDS +#' +#' @param all_kpis List with all KPI results +#' @param kpi_summary List with summary tables +#' @param output_dir Directory for output files +#' @param week Week number +#' @param year Year +#' +#' @return List of output file paths +export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year) { + # Ensure output directory exists + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE) + } + + # Export all KPI tables to a single Excel file + excel_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".xlsx") + + sheets <- list( + "Uniformity" = as.data.frame(kpi_summary$uniformity), + "Area_Change" = as.data.frame(kpi_summary$area_change), + "TCH_Forecast" = as.data.frame(kpi_summary$tch_forecast), + "Growth_Decline" = as.data.frame(kpi_summary$growth_decline), + "Weed_Pressure" = as.data.frame(kpi_summary$weed_pressure), + "Gap_Filling" = as.data.frame(kpi_summary$gap_filling) + ) + + write_xlsx(sheets, excel_file) + message(paste("✓ AURA KPI data exported to:", excel_file)) + + # Also export to RDS for programmatic access + rds_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".rds") + saveRDS(all_kpis, rds_file) + message(paste("✓ AURA KPI RDS exported to:", rds_file)) + + return(list(excel = excel_file, rds = rds_file)) +} + +# ============================================================================ +# ORCHESTRATOR FUNCTION +# ============================================================================ + +#' Calculate all 6 AURA KPIs +#' +#' Main entry point for AURA KPI calculation. +#' This function orchestrates the 6 KPI calculations and returns all results. +#' +#' @param field_boundaries_sf SF object with field geometries +#' @param current_week ISO week number (1-53) +#' @param current_year ISO week year +#' @param current_mosaic_dir Directory containing current week's mosaic +#' @param previous_mosaic_dir Directory containing previous week's mosaic (optional) +#' @param ci_rds_path Path to combined CI RDS file +#' @param harvesting_data Data frame with harvest data (optional) +#' @param output_dir Directory for KPI exports +#' +#' @return List with results from all 6 KPI functions +#' +#' @details +#' This function: +#' 1. Loads current week mosaic and extracts field statistics +#' 2. (Optionally) loads previous week mosaic for comparison metrics +#' 3. Calculates all 6 AURA KPIs +#' 4. Creates summary tables +#' 5. Exports results to Excel/RDS +#' +calculate_all_kpis <- function( + field_boundaries_sf, + current_week, + current_year, + current_mosaic_dir, + previous_mosaic_dir = NULL, + ci_rds_path = NULL, + harvesting_data = NULL, + output_dir = NULL +) { + + message("\n============ AURA KPI CALCULATION (6 KPIs) ============") + + # Load current week mosaic + message("Loading current week mosaic...") + current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir) + + if (is.null(current_mosaic)) { + stop("Could not load current week mosaic") + } + + # Extract field statistics + message("Extracting field statistics from current mosaic...") + current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + ci_pixels_by_field <- extract_ci_values(current_mosaic, field_boundaries_sf) + + # Load previous week mosaic (if available) + previous_stats <- NULL + if (!is.null(previous_mosaic_dir)) { + target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) + message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")...")) + previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir) + + if (!is.null(previous_mosaic)) { + previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + } else { + message("Previous week mosaic not available - skipping area change KPI") + } + } + + # Calculate 6 KPIs + message("\nCalculating KPI 1: Field Uniformity...") + uniformity_kpi <- calculate_field_uniformity_kpi(ci_pixels_by_field, field_boundaries_sf, current_mosaic) + + message("Calculating KPI 2: Area Change...") + if (!is.null(previous_stats)) { + area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats) + } else { + area_change_kpi <- data.frame( + field_idx = seq_len(nrow(field_boundaries_sf)), + mean_ci_pct_change = NA_real_, + interpretation = rep("No previous data", nrow(field_boundaries_sf)) + ) + } + + message("Calculating KPI 3: TCH Forecasted...") + tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf) + + message("Calculating KPI 4: Growth Decline...") + growth_decline_kpi <- calculate_growth_decline_kpi( + list(ci_pixels_by_field) # Would need historical data for real trend + ) + + message("Calculating KPI 5: Weed Presence...") + weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) + + message("Calculating KPI 6: Gap Filling...") + gap_filling_kpi <- calculate_gap_filling_kpi(ci_rds_path) + + # Compile results + all_kpis <- list( + uniformity = uniformity_kpi, + area_change = area_change_kpi, + tch_forecasted = tch_kpi, + growth_decline = growth_decline_kpi, + weed_presence = weed_kpi, + gap_filling = gap_filling_kpi + ) + + # Create summary tables + kpi_summary <- create_summary_tables(all_kpis) + + # Export + export_paths <- export_kpi_data(all_kpis, kpi_summary, output_dir, current_week, current_year) + + message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year, "\n")) + + return(all_kpis) +} diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R new file mode 100644 index 0000000..df6e319 --- /dev/null +++ b/r_app/80_utils_cane_supply.R @@ -0,0 +1,210 @@ +# 80_UTILS_CANE_SUPPLY.R +# ============================================================================ +# CANE SUPPLY CLIENT-SPECIFIC UTILITIES (SCRIPT 80 - CLIENT TYPE: cane_supply) +# +# Contains ANGATA and other cane supply-specific KPI and reporting functions. +# +# Currently, CANE_SUPPLY clients use the common utilities from 80_utils_common.R: +# - Weekly statistics (calculate_field_statistics, calculate_kpi_trends) +# - Field analysis reporting (generate_field_analysis_summary) +# - Excel export (export_field_analysis_excel) +# +# This file is structured to accommodate future ANGATA-specific functionality such as: +# - Custom yield models +# - Harvest readiness criteria +# - Supply chain integration hooks +# - ANGATA-specific alerting and messaging +# +# Orchestrator: (Placeholder - uses common functions) +# Dependencies: 00_common_utils.R, 80_utils_common.R +# Used by: 80_calculate_kpis.R (when client_type == "cane_supply") +# ============================================================================ + +library(terra) +library(sf) +library(dplyr) +library(tidyr) +library(readxl) +library(writexl) + +# ============================================================================ +# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section) +# ============================================================================ + +#' Placeholder: ANGATA harvest readiness assessment +#' +#' Future implementation will integrate ANGATA-specific harvest readiness criteria: +#' - Maturation phase detection (CI threshold-based) +#' - Field age tracking (days since planting) +#' - Weather-based ripeness indicators (if available) +#' - Historical yield correlations +#' +#' @param field_ci CI values for the field +#' @param field_age_days Days since planting +#' +#' @return Character string with harvest readiness assessment +assess_harvest_readiness <- function(field_ci, field_age_days = NULL) { + # Placeholder implementation + # Real version would check: + # - Mean CI > 3.5 (maturation threshold) + # - Age > 350 days + # - Weekly growth rate < threshold (mature plateau) + + if (is.null(field_ci) || all(is.na(field_ci))) { + return("No data available") + } + + mean_ci <- mean(field_ci, na.rm = TRUE) + + if (mean_ci > 3.5) { + return("Ready for harvest") + } else if (mean_ci > 2.5) { + return("Approaching harvest readiness") + } else { + return("Not ready - continue monitoring") + } +} + +#' Placeholder: ANGATA supply chain status flags +#' +#' Future implementation will add supply chain-specific status indicators: +#' - Harvest scheduling readiness +#' - Equipment availability impact +#' - Transportation/logistics flags +#' - Quality parameter flags +#' +#' @param field_analysis Data frame with field analysis results +#' +#' @return Data frame with supply chain status columns +assess_supply_chain_status <- function(field_analysis) { + # Placeholder: return field analysis as-is + # Real version would add columns for: + # - schedule_ready (bool) + # - harvest_window_days (numeric) + # - transportation_priority (char) + # - quality_flags (char) + + return(field_analysis) +} + +# ============================================================================ +# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS +# ============================================================================ + +#' Orchestrate ANGATA weekly field analysis and reporting +#' +#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows. +#' Currently uses common utilities; future versions will add client-specific logic. +#' +#' @param field_boundaries_sf SF object with field geometries +#' @param current_week ISO week number (1-53) +#' @param current_year ISO week year +#' @param mosaic_dir Directory containing weekly mosaics +#' @param field_boundaries_path Path to field GeoJSON +#' @param harvesting_data Data frame with harvest data (optional) +#' @param output_dir Directory for exports +#' @param data_dir Base data directory +#' +#' @return List with field analysis results +#' +#' @details +#' This function: +#' 1. Loads weekly mosaic and extracts field statistics +#' 2. Calculates field statistics (using common utilities) +#' 3. Prepares field analysis summary +#' 4. Exports to Excel/CSV/RDS +#' 5. (Future) Applies ANGATA-specific assessments +#' +calculate_field_analysis_cane_supply <- function( + field_boundaries_sf, + current_week, + current_year, + mosaic_dir, + field_boundaries_path = NULL, + harvesting_data = NULL, + output_dir = file.path(PROJECT_DIR, "output"), + data_dir = NULL +) { + + message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============") + + # Load current week mosaic + message("Loading current week mosaic...") + current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year) + + if (is.null(current_mosaic)) { + warning(paste("Could not load current week mosaic for week", current_week, current_year)) + return(NULL) + } + + # Extract field statistics + message("Extracting field statistics from current mosaic...") + field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + + # Load previous week stats for comparison + message("Loading historical data for trends...") + target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) + previous_stats <- NULL + + previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year) + if (!is.null(previous_mosaic)) { + previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + } + + # Calculate 4-week historical trend + message("Calculating field trends...") + ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds") + + field_analysis <- calculate_field_statistics( + field_stats = field_stats, + previous_stats = previous_stats, + week_num = current_week, + year = current_year, + ci_rds_path = ci_rds_path, + field_boundaries_sf = field_boundaries_sf, + harvesting_data = harvesting_data + ) + + if (is.null(field_analysis)) { + message("Could not generate field analysis") + return(NULL) + } + + # Generate summary + message("Generating summary statistics...") + summary_df <- generate_field_analysis_summary(field_analysis) + + # Export + message("Exporting field analysis...") + export_paths <- export_field_analysis_excel( + field_analysis, + summary_df, + PROJECT_DIR, + current_week, + current_year, + output_dir + ) + + message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n")) + + result <- list( + field_analysis = field_analysis, + summary = summary_df, + exports = export_paths + ) + + return(result) +} + +# ============================================================================ +# FUTURE EXTENSION POINTS +# ============================================================================ + +# Placeholder for ANGATA-specific utilities that may be added in future: +# - Custom yield models based on ANGATA historical data +# - Field condition thresholds specific to ANGATA growing practices +# - Integration with ANGATA harvest scheduling system +# - WhatsApp messaging templates for ANGATA supply chain stakeholders +# - Cost/benefit analysis for ANGATA operational decisions + +# These functions can be added here as ANGATA requirements evolve. diff --git a/r_app/80_weekly_stats_utils.R b/r_app/80_utils_common.R similarity index 51% rename from r_app/80_weekly_stats_utils.R rename to r_app/80_utils_common.R index a4b460e..3c85f8a 100644 --- a/r_app/80_weekly_stats_utils.R +++ b/r_app/80_utils_common.R @@ -1,22 +1,92 @@ -# 80_WEEKLY_STATS_UTILS.R +# 80_UTILS_COMMON.R # ============================================================================ -# UTILITY FUNCTIONS FOR WEEKLY STATISTICS CALCULATION +# SHARED UTILITY FUNCTIONS FOR ALL CLIENT TYPES (SCRIPT 80) # -# This file contains reusable functions for: -# - Tile grid management -# - Tile loading and merging -# - Field-level statistics calculation from CI rasters -# - Weekly stats caching (RDS/CSV export/import) -# - KPI trend calculations -# - Historical data loading and auto-generation from mosaics +# Contains helper and infrastructure functions used by both AURA and ANGATA workflows: +# - Statistical categorization and calculations +# - Tile operations and data loading +# - Field statistics extraction +# - Week/year calculations for consistent date handling +# - Excel/CSV/RDS export utilities # -# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts +# Used by: 80_calculate_kpis.R, all client-specific utils files # ============================================================================ +# ============================================================================ +# CONSTANTS (from 80_calculate_kpis.R) +# ============================================================================ + +# Four-week trend thresholds +FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 +FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 +FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 +FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1 +FOUR_WEEK_TREND_DECLINE_MAX <- -0.1 +FOUR_WEEK_TREND_DECLINE_MIN <- -0.5 +FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5 + +# CV Trend thresholds (8-week slope interpretation) +CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 +CV_SLOPE_IMPROVEMENT_MIN <- -0.02 +CV_SLOPE_IMPROVEMENT_MAX <- -0.01 +CV_SLOPE_HOMOGENOUS_MIN <- -0.01 +CV_SLOPE_HOMOGENOUS_MAX <- 0.01 +CV_SLOPE_PATCHINESS_MIN <- 0.01 +CV_SLOPE_PATCHINESS_MAX <- 0.02 +CV_SLOPE_SEVERE_MIN <- 0.02 + +# Percentile calculations +CI_PERCENTILE_LOW <- 0.10 +CI_PERCENTILE_HIGH <- 0.90 + +# Phase definitions (used by get_phase_by_age) +PHASE_DEFINITIONS <- data.frame( + phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), + age_start = c(0, 4, 17, 39), + age_end = c(6, 16, 39, 200), + stringsAsFactors = FALSE +) + +# ============================================================================ +# WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts) +# ============================================================================ + +#' Calculate week and year for a given lookback offset +#' This function handles ISO 8601 week numbering with proper year wrapping +#' when crossing year boundaries (e.g., week 01/2026 -> week 52/2025) +#' +#' @param current_week ISO week number (1-53) +#' @param current_year ISO week year (from format(..., "%G")) +#' @param offset_weeks Number of weeks to go back (0 = current week, 1 = previous week, etc.) +#' +#' @return List with: week (ISO week number), year (ISO week year) +#' +#' @details +#' This is the authoritative week/year calculation function. +#' Used by: +#' - load_historical_field_data() - to find RDS/CSV files for 4-week lookback +#' - Script 80 main - to calculate previous week with year wrapping +#' - Any other script needing to walk backwards through weeks +#' +#' Example: Week 01/2026, offset=1 -> returns list(week=52, year=2025) +calculate_target_week_and_year <- function(current_week, current_year, offset_weeks = 0) { + target_week <- current_week - offset_weeks + target_year <- current_year + + # Handle wrapping: when going back from week 1, wrap to week 52 of previous year + while (target_week < 1) { + target_week <- target_week + 52 + target_year <- target_year - 1 + } + + return(list(week = target_week, year = target_year)) +} + # ============================================================================ # TILE-AWARE HELPER FUNCTIONS # ============================================================================ +#' Get tile IDs that intersect with a field geometry get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { if (inherits(field_geom, "sf")) { field_bbox <- sf::st_bbox(field_geom) @@ -44,6 +114,7 @@ get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { return(as.numeric(intersecting_tiles)) } +#' Load and merge tiles for a specific field load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { if (length(tile_ids) == 0) { return(NULL) @@ -83,6 +154,7 @@ load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_di } } +#' Build tile grid from available tile files build_tile_grid <- function(mosaic_dir, week_num, year) { # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/) detected_grid_size <- NA @@ -149,6 +221,7 @@ build_tile_grid <- function(mosaic_dir, week_num, year) { # STATISTICAL CATEGORIZATION FUNCTIONS # ============================================================================ +#' Categorize four-week CI trend categorize_four_week_trend <- function(ci_values_list) { if (is.null(ci_values_list) || length(ci_values_list) < 2) { return(NA_character_) @@ -179,6 +252,7 @@ categorize_four_week_trend <- function(ci_values_list) { } } +#' Round cloud coverage to interval categories round_cloud_to_intervals <- function(cloud_pct_clear) { if (is.na(cloud_pct_clear)) { return(NA_character_) @@ -197,6 +271,7 @@ round_cloud_to_intervals <- function(cloud_pct_clear) { return("95-100%") } +#' Get CI percentile range (10th to 90th) get_ci_percentiles <- function(ci_values) { if (is.null(ci_values) || length(ci_values) == 0) { return(NA_character_) @@ -213,6 +288,7 @@ get_ci_percentiles <- function(ci_values) { return(sprintf("%.1f-%.1f", p10, p90)) } +#' Calculate short-term CV trend (current week vs previous week) calculate_cv_trend <- function(cv_current, cv_previous) { if (is.na(cv_current) || is.na(cv_previous)) { return(NA_real_) @@ -220,10 +296,8 @@ calculate_cv_trend <- function(cv_current, cv_previous) { return(round(cv_current - cv_previous, 4)) } +#' Calculate four-week CI trend calculate_four_week_trend <- function(mean_ci_values) { - #' Calculate four-week CI trend from available weeks - #' Uses whatever weeks are available (1-4 weeks) to estimate trend - if (is.null(mean_ci_values) || length(mean_ci_values) == 0) { return(NA_real_) } @@ -238,9 +312,8 @@ calculate_four_week_trend <- function(mean_ci_values) { return(round(trend, 2)) } +#' Categorize CV slope (8-week regression) into field uniformity interpretation categorize_cv_slope <- function(slope) { - #' Categorize CV slope (8-week regression) into field uniformity interpretation - if (is.na(slope)) { return(NA_character_) } @@ -258,9 +331,8 @@ categorize_cv_slope <- function(slope) { } } +#' Calculate 8-week CV trend via linear regression slope calculate_cv_trend_long_term <- function(cv_values) { - #' Calculate 8-week CV trend via linear regression slope - if (is.null(cv_values) || length(cv_values) == 0) { return(NA_real_) } @@ -286,6 +358,7 @@ calculate_cv_trend_long_term <- function(cv_values) { # HELPER FUNCTIONS # ============================================================================ +#' Get crop phase by age in weeks get_phase_by_age <- function(age_weeks) { if (is.na(age_weeks)) return(NA_character_) for (i in seq_len(nrow(PHASE_DEFINITIONS))) { @@ -297,6 +370,7 @@ get_phase_by_age <- function(age_weeks) { return("Unknown") } +#' Get status trigger based on CI values and field age get_status_trigger <- function(ci_values, ci_change, age_weeks) { if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) @@ -339,10 +413,8 @@ get_status_trigger <- function(ci_values, ci_change, age_weeks) { return(NA_character_) } +#' Extract planting dates from harvesting data extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { - # Extract planting dates from harvest.xlsx (season_start column) - # Returns: data.frame with columns (field_id, planting_date) - if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { message("Warning: No harvesting data available - planting dates will be NA.") if (!is.null(field_boundaries_sf)) { @@ -373,513 +445,11 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) } # ============================================================================ -# MODULAR STATISTICS CALCULATION -# ============================================================================ - -calculate_field_statistics <- function(field_boundaries_sf, week_num, year, - mosaic_dir, report_date = Sys.Date()) { - - message(paste("Calculating statistics for all fields - Week", week_num, year)) - - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) - tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) - - if (length(tile_files) == 0) { - stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) - } - - message(paste(" Found", length(tile_files), "tiles for week", week_num)) - - results_list <- list() - fields_processed <- 0 - - for (tile_idx in seq_along(tile_files)) { - tile_file <- tile_files[tile_idx] - - tryCatch({ - current_rast <- terra::rast(tile_file) - ci_band <- current_rast[["CI"]] - - if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { - message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found")) - return(NULL) - } - - extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE) - unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)]) - - for (field_poly_idx in unique_field_ids) { - field_id <- field_boundaries_sf$field[field_poly_idx] - ci_vals <- extracted$CI[extracted$ID == field_poly_idx] - ci_vals <- ci_vals[!is.na(ci_vals)] - - if (length(ci_vals) == 0) { - next - } - - mean_ci <- mean(ci_vals, na.rm = TRUE) - ci_std <- sd(ci_vals, na.rm = TRUE) - cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ - range_min <- min(ci_vals, na.rm = TRUE) - range_max <- max(ci_vals, na.rm = TRUE) - range_str <- sprintf("%.1f-%.1f", range_min, range_max) - ci_percentiles_str <- get_ci_percentiles(ci_vals) - - # Count pixels with CI >= 2 (germination threshold) - GERMINATION_CI_THRESHOLD <- 2.0 - num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE) - num_pixels_total <- length(ci_vals) - pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 - - field_rows <- extracted[extracted$ID == field_poly_idx, ] - num_total <- nrow(field_rows) - num_data <- sum(!is.na(field_rows$CI)) - pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 - cloud_cat <- if (num_data == 0) "No image available" - else if (pct_clear >= 95) "Clear view" - else "Partial coverage" - - # Age_week and Phase are now calculated in main script using actual planting dates - # Germination_progress is calculated in main script after Age_week is known - - existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) - - if (length(existing_idx) > 0) { - next - } - - results_list[[length(results_list) + 1]] <- data.frame( - Field_id = field_id, - Mean_CI = round(mean_ci, 2), - CV = round(cv * 100, 2), - CI_range = range_str, - CI_Percentiles = ci_percentiles_str, - Pct_pixels_CI_gte_2 = pct_pixels_gte_2, - Cloud_pct_clear = pct_clear, - Cloud_category = cloud_cat, - stringsAsFactors = FALSE - ) - - fields_processed <- fields_processed + 1 - } - - message(paste(" Tile", tile_idx, "of", length(tile_files), "processed")) - - }, error = function(e) { - message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message)) - }) - } - - if (length(results_list) == 0) { - stop(paste("No fields processed successfully for week", week_num)) - } - - stats_df <- dplyr::bind_rows(results_list) - message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields")) - - return(stats_df) -} - -# ============================================================================ -# CALCULATE KPI TRENDS -# ============================================================================ - -calculate_kpi_trends <- function(current_stats, prev_stats = NULL, - project_dir = NULL, reports_dir = NULL, - current_week = NULL, year = NULL) { - - message("Calculating KPI trends from current and previous week data") - - current_stats$Weekly_ci_change <- NA_real_ - current_stats$CV_Trend_Short_Term <- NA_real_ - current_stats$Four_week_trend <- NA_real_ - current_stats$CV_Trend_Long_Term <- NA_real_ - current_stats$nmr_of_weeks_analysed <- 1L - - if (is.null(prev_stats) || nrow(prev_stats) == 0) { - message(" No previous week data available - using defaults") - return(current_stats) - } - - message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) - - prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) - - prev_field_analysis <- NULL - - tryCatch({ - analysis_dir <- file.path(reports_dir, "kpis", "field_analysis") - if (dir.exists(analysis_dir)) { - analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) - if (length(analysis_files) > 0) { - recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] - prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, - col_select = c(Field_id, nmr_of_weeks_analysed, Phase)) - } - } - }, error = function(e) { - message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) - }) - - if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { - message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed")) - } - - historical_4weeks <- list() - historical_8weeks <- list() - - if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) { - message(" Loading historical field_stats for 4-week and 8-week trends...") - - for (lookback in 1:4) { - target_week <- current_week - lookback - target_year <- year - if (target_week < 1) { - target_week <- target_week + 52 - target_year <- target_year - 1 - } - - rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - if (file.exists(rds_path)) { - tryCatch({ - stats_data <- readRDS(rds_path) - historical_4weeks[[length(historical_4weeks) + 1]] <- list( - week = target_week, - stats = stats_data - ) - }, error = function(e) { - message(paste(" Warning: Could not load week", target_week, ":", e$message)) - }) - } - } - - for (lookback in 1:8) { - target_week <- current_week - lookback - target_year <- year - if (target_week < 1) { - target_week <- target_week + 52 - target_year <- target_year - 1 - } - - rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - if (file.exists(rds_path)) { - tryCatch({ - stats_data <- readRDS(rds_path) - historical_8weeks[[length(historical_8weeks) + 1]] <- list( - week = target_week, - stats = stats_data - ) - }, error = function(e) { - # Silently skip - }) - } - } - - if (length(historical_4weeks) > 0) { - message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend")) - } - if (length(historical_8weeks) > 0) { - message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend")) - } - } - - cv_trends_calculated <- 0 - four_week_trends_calculated <- 0 - cv_long_term_calculated <- 0 - - for (i in seq_len(nrow(current_stats))) { - field_id <- current_stats$Field_id[i] - prev_idx <- prev_lookup[field_id] - - if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { - prev_row <- prev_stats[prev_idx, , drop = FALSE] - - prev_ci <- prev_row$Mean_CI[1] - if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) { - current_stats$Weekly_ci_change[i] <- - round(current_stats$Mean_CI[i] - prev_ci, 2) - } - - prev_cv <- prev_row$CV[1] - if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { - current_stats$CV_Trend_Short_Term[i] <- - calculate_cv_trend(current_stats$CV[i], prev_cv) - cv_trends_calculated <- cv_trends_calculated + 1 - } - - if (length(historical_4weeks) > 0) { - ci_values_4week <- numeric() - - for (hist_idx in rev(seq_along(historical_4weeks))) { - hist_data <- historical_4weeks[[hist_idx]]$stats - hist_field <- which(hist_data$Field_id == field_id) - if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) { - ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]]) - } - } - - ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) - - if (length(ci_values_4week) >= 2) { - current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) - four_week_trends_calculated <- four_week_trends_calculated + 1 - } - } - - if (length(historical_8weeks) > 0) { - cv_values_8week <- numeric() - - for (hist_idx in rev(seq_along(historical_8weeks))) { - hist_data <- historical_8weeks[[hist_idx]]$stats - hist_field <- which(hist_data$Field_id == field_id) - if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) { - cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]]) - } - } - - cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) - - if (length(cv_values_8week) >= 2) { - slope <- calculate_cv_trend_long_term(cv_values_8week) - current_stats$CV_Trend_Long_Term[i] <- slope - cv_long_term_calculated <- cv_long_term_calculated + 1 - } - } - - if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { - prev_analysis_row <- prev_field_analysis %>% - dplyr::filter(Field_id == field_id) - - if (nrow(prev_analysis_row) > 0) { - prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1] - - # Only increment nmr_of_weeks_analysed if we have previous data - if (!is.na(prev_nmr_weeks_analysis)) { - current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L - } else { - current_stats$nmr_of_weeks_analysed[i] <- 1L - } - } - } - } - } - - message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields")) - message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields")) - message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields")) - return(current_stats) -} - -# ============================================================================ -# LOAD OR CALCULATE WEEKLY STATISTICS -# ============================================================================ - -load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, - mosaic_dir, reports_dir, report_date = Sys.Date()) { - - rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - if (file.exists(rds_path)) { - message(paste("Loading cached statistics from:", basename(rds_path))) - return(readRDS(rds_path)) - } - - message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) - stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, - mosaic_dir, report_date) - - output_dir <- file.path(reports_dir, "kpis", "field_stats") - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - } - - saveRDS(stats_df, rds_path) - message(paste("Saved weekly statistics RDS:", basename(rds_path))) - - csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year) - csv_path <- file.path(output_dir, csv_filename) - readr::write_csv(stats_df, csv_path) - message(paste("Saved weekly statistics CSV:", basename(csv_path))) - - return(stats_df) -} - -load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { - historical_data <- list() - loaded_weeks <- c() - missing_weeks <- c() - - for (lookback in 0:(num_weeks - 1)) { - target_week <- current_week - lookback - if (target_week < 1) target_week <- target_week + 52 - - csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv") - csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) - - if (file.exists(csv_path)) { - tryCatch({ - data <- read_csv(csv_path, show_col_types = FALSE) - historical_data[[lookback + 1]] <- list( - week = target_week, - data = data - ) - loaded_weeks <- c(loaded_weeks, target_week) - }, error = function(e) { - message(paste(" Warning: Could not load week", target_week, ":", e$message)) - missing_weeks <<- c(missing_weeks, target_week) - }) - } else { - missing_weeks <- c(missing_weeks, target_week) - } - } - - if (length(missing_weeks) > 0 && auto_generate) { - message(paste("⚠ Missing weeks:", paste(missing_weeks, collapse = ", "))) - message("Scanning for ALL available weekly mosaics and calculating stats...\n") - - if (is.null(field_boundaries_sf)) { - message(" Error: field_boundaries_sf not provided - cannot auto-generate") - return(historical_data) - } - - if (!exists("weekly_tile_max")) { - message(" ✗ weekly_tile_max path not defined") - return(historical_data) - } - - check_paths <- c(file.path(weekly_tile_max, "5x5"), weekly_tile_max) - mosaic_scan_dir <- NA - - for (check_path in check_paths) { - if (dir.exists(check_path)) { - tif_files <- list.files(check_path, pattern = "week_.*\\.tif$", full.names = TRUE) - if (length(tif_files) > 0) { - mosaic_scan_dir <- check_path - break - } - } - } - - if (is.na(mosaic_scan_dir)) { - message(" ✗ No mosaic files found in weekly_tile_max") - return(historical_data) - } - - weeks_to_load <- 8 - today <- Sys.Date() - target_dates <- today - (0:(weeks_to_load - 1)) * 7 - - expected_weeks <- data.frame( - date = target_dates, - week = as.numeric(format(target_dates, "%V")), - year = as.numeric(format(target_dates, "%Y")), - stringsAsFactors = FALSE - ) - expected_weeks <- unique(expected_weeks) - - message(paste(" Expected weeks (last 8 from", format(today, "%Y-%m-%d"), "):")) - for (i in seq_len(nrow(expected_weeks))) { - message(paste(" Week", sprintf("%02d", expected_weeks$week[i]), expected_weeks$year[i])) - } - message("") - - tif_files <- list.files(mosaic_scan_dir, pattern = "week_([0-9]{2})_([0-9]{4})_[0-9]{2}\\.tif$", - full.names = FALSE) - - available_weeks <- data.frame() - for (filename in tif_files) { - matches <- regmatches(filename, gregexpr("week_([0-9]{2})_([0-9]{4})", filename))[[1]] - if (length(matches) > 0) { - week_year <- strsplit(matches[1], "_")[[1]] - if (length(week_year) == 3) { - week_num <- as.numeric(week_year[2]) - year_num <- as.numeric(week_year[3]) - - if (week_num %in% expected_weeks$week && year_num %in% expected_weeks$year) { - available_weeks <- rbind(available_weeks, - data.frame(week = week_num, year = year_num)) - } - } - } - } - - available_weeks <- unique(available_weeks) - available_weeks <- merge(available_weeks, expected_weeks[, c("week", "year", "date")], by = c("week", "year")) - available_weeks <- available_weeks[order(available_weeks$date, decreasing = TRUE), ] - - if (nrow(available_weeks) == 0) { - message(" ✗ No matching mosaic files found") - message(paste(" Scanned directory:", mosaic_scan_dir)) - return(historical_data) - } - - message(paste(" Found", nrow(available_weeks), "week(s) with available mosaics:")) - - for (i in seq_len(nrow(available_weeks))) { - week_to_calc <- available_weeks$week[i] - year_to_calc <- available_weeks$year[i] - date_to_calc <- available_weeks$date[i] - - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_to_calc, year_to_calc) - tile_files <- list.files(mosaic_scan_dir, pattern = tile_pattern, full.names = TRUE) - - if (length(tile_files) == 0) { - message(paste(" ✗ Week", sprintf("%02d", week_to_calc), year_to_calc, "- no tiles found")) - next - } - - message(paste(" ✓ Week", sprintf("%02d", week_to_calc), year_to_calc, "-", length(tile_files), "mosaics")) - - tryCatch({ - week_stats <- load_or_calculate_weekly_stats( - week_num = week_to_calc, - year = year_to_calc, - project_dir = project_dir, - field_boundaries_sf = field_boundaries_sf, - mosaic_dir = mosaic_scan_dir, - reports_dir = reports_dir, - report_date = date_to_calc - ) - - if (!is.null(week_stats) && nrow(week_stats) > 0) { - message(paste(" ✓ Calculated stats for", nrow(week_stats), "fields")) - - historical_data[[length(historical_data) + 1]] <- list( - week = week_to_calc, - year = year_to_calc, - data = week_stats - ) - loaded_weeks <- c(loaded_weeks, paste0(week_to_calc, "_", year_to_calc)) - } - }, error = function(e) { - message(paste(" ✗ Error:", e$message)) - }) - } - } - - if (length(historical_data) == 0) { - message(paste("Error: No historical field data found and could not auto-generate weeks")) - return(NULL) - } - - message(paste("✓ Loaded", length(historical_data), "weeks of historical data:", - paste(loaded_weeks, collapse = ", "))) - - return(historical_data) -} - -# ============================================================================ -# HELPER: Extract field-level statistics from CI raster +# FIELD STATISTICS EXTRACTION # ============================================================================ +#' Extract CI statistics for all fields from a single CI raster band extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { - #' Extract CI statistics for all fields from a single CI raster band - extract_result <- terra::extract(ci_band, field_boundaries_sf) stats_list <- list() @@ -929,13 +499,742 @@ extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { } # ============================================================================ -# COMMENTED OUT / UNUSED FUNCTIONS (kept for future use) +# EXPORT FUNCTIONS (USED BY ALL CLIENTS) # ============================================================================ -# analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year, -# mosaic_dir, historical_data = NULL, planting_dates = NULL, -# report_date = Sys.Date(), harvest_imminence_data = NULL, -# harvesting_data = NULL) { -# # [Function kept as reference for parallel field analysis] -# # Currently replaced by calculate_field_statistics() for efficiency -# } +#' Generate summary statistics from field analysis data +generate_field_analysis_summary <- function(field_df) { + message("Generating summary statistics...") + + total_acreage <- sum(field_df$Acreage, na.rm = TRUE) + + germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE) + tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE) + grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE) + maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE) + unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE) + + harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) + stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) + recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) + growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) + germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) + germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) + no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) + + clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) + partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) + no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) + total_fields <- nrow(field_df) + + clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) + partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) + no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) + + summary_df <- data.frame( + Category = c( + "--- PHASE DISTRIBUTION ---", + "Germination", + "Tillering", + "Grand Growth", + "Maturation", + "Unknown phase", + "--- STATUS TRIGGERS ---", + "Harvest ready", + "Stress detected", + "Strong recovery", + "Growth on track", + "Germination complete", + "Germination started", + "No trigger", + "--- CLOUD COVERAGE (FIELDS) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- CLOUD COVERAGE (ACREAGE) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- TOTAL ---", + "Total Acreage" + ), + Acreage = c( + NA, + round(germination_acreage, 2), + round(tillering_acreage, 2), + round(grand_growth_acreage, 2), + round(maturation_acreage, 2), + round(unknown_phase_acreage, 2), + NA, + round(harvest_ready_acreage, 2), + round(stress_acreage, 2), + round(recovery_acreage, 2), + round(growth_on_track_acreage, 2), + round(germination_complete_acreage, 2), + round(germination_started_acreage, 2), + round(no_trigger_acreage, 2), + NA, + paste0(clear_fields, " fields"), + paste0(partial_fields, " fields"), + paste0(no_image_fields, " fields"), + NA, + round(clear_acreage, 2), + round(partial_acreage, 2), + round(no_image_acreage, 2), + NA, + round(total_acreage, 2) + ), + stringsAsFactors = FALSE + ) + + return(summary_df) +} + +#' Export field analysis to Excel, CSV, and RDS formats +export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) { + message("Exporting per-field analysis to Excel, CSV, and RDS...") + + field_df_rounded <- field_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + + # Handle NULL summary_df + summary_df_rounded <- if (!is.null(summary_df)) { + summary_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + } else { + NULL + } + + output_subdir <- file.path(reports_dir, "field_analysis") + if (!dir.exists(output_subdir)) { + dir.create(output_subdir, recursive = TRUE) + } + + excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx") + excel_path <- file.path(output_subdir, excel_filename) + excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) + + # Build sheets list dynamically + sheets <- list( + "Field Data" = field_df_rounded + ) + if (!is.null(summary_df_rounded)) { + sheets[["Summary"]] <- summary_df_rounded + } + + write_xlsx(sheets, excel_path) + message(paste("✓ Field analysis Excel exported to:", excel_path)) + + kpi_data <- list( + field_analysis = field_df_rounded, + field_analysis_summary = summary_df_rounded, + metadata = list( + current_week = current_week, + year = year, + project = project_dir, + created_at = Sys.time() + ) + ) + + rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") + rds_path <- file.path(reports_dir, rds_filename) + + saveRDS(kpi_data, rds_path) + message(paste("✓ Field analysis RDS exported to:", rds_path)) + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv") + csv_path <- file.path(output_subdir, csv_filename) + write_csv(field_df_rounded, csv_path) + message(paste("✓ Field analysis CSV exported to:", csv_path)) + + return(list(excel = excel_path, rds = rds_path, csv = csv_path)) +} + +# ============================================================================ +# ADDITIONAL CRITICAL FUNCTIONS FROM 80_weekly_stats_utils.R (REQUIRED BY 80_calculate_kpis.R) +# ============================================================================ + +#' Calculate statistics for all fields from weekly mosaics +calculate_field_statistics <- function(field_boundaries_sf, week_num, year, + mosaic_dir, report_date = Sys.Date()) { + + message(paste("Calculating statistics for all fields - Week", week_num, year)) + + # Per-field mode: look in per-field subdirectories + single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year) + + # Find all field subdirectories with mosaics for this week + field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + per_field_files <- list() + for (field in field_dirs) { + field_mosaic_dir <- file.path(mosaic_dir, field) + files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(files) > 0) { + per_field_files[[field]] <- files[1] # Take first match for this field + } + } + + if (length(per_field_files) == 0) { + stop(paste("No per-field mosaic files found for week", week_num, year, "in", mosaic_dir)) + } + + message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num)) + results_list <- list() + + # Initialize progress bar + pb <- progress::progress_bar$new( + format = " [:bar] :percent | Field :current/:total", + total = length(per_field_files), + width = 60 + ) + + # Process each field's mosaic + for (field_idx in seq_along(per_field_files)) { + pb$tick() # Update progress bar + field_name <- names(per_field_files)[field_idx] + field_file <- per_field_files[[field_name]] + + tryCatch({ + current_rast <- terra::rast(field_file) + ci_band <- current_rast[["CI"]] + + if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { + message(paste(" [SKIP] Field", field_name, "- CI band not found")) + next + } + + # Extract CI values for this field + field_boundary <- field_boundaries_sf[field_boundaries_sf$field == field_name, ] + + if (nrow(field_boundary) == 0) { + message(paste(" [SKIP] Field", field_name, "- not in field boundaries")) + next + } + + extracted <- terra::extract(ci_band, field_boundary, na.rm = FALSE) + + if (nrow(extracted) == 0 || all(is.na(extracted$CI))) { + message(paste(" [SKIP] Field", field_name, "- no CI values found")) + next + } + + ci_vals <- extracted$CI[!is.na(extracted$CI)] + + if (length(ci_vals) == 0) { + next + } + + # Calculate statistics + mean_ci <- mean(ci_vals, na.rm = TRUE) + ci_std <- sd(ci_vals, na.rm = TRUE) + cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ + range_min <- min(ci_vals, na.rm = TRUE) + range_max <- max(ci_vals, na.rm = TRUE) + range_str <- sprintf("%.1f-%.1f", range_min, range_max) + ci_percentiles_str <- get_ci_percentiles(ci_vals) + + num_pixels_total <- length(ci_vals) + num_pixels_gte_2 <- sum(ci_vals >= 2) + pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 + + num_total <- nrow(extracted) + num_data <- sum(!is.na(extracted$CI)) + pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 + cloud_cat <- if (num_data == 0) "No image available" + else if (pct_clear >= 95) "Clear view" + else "Partial coverage" + + # Add to results + results_list[[length(results_list) + 1]] <- data.frame( + Field_id = field_name, + Mean_CI = round(mean_ci, 2), + CV = round(cv * 100, 2), + CI_range = range_str, + CI_Percentiles = ci_percentiles_str, + Pct_pixels_CI_gte_2 = pct_pixels_gte_2, + Cloud_pct_clear = pct_clear, + Cloud_category = cloud_cat, + stringsAsFactors = FALSE + ) + + }, error = function(e) { + message(paste(" [ERROR] Field", field_name, ":", e$message)) + }) + } + + if (length(results_list) == 0) { + stop(paste("No fields processed successfully for week", week_num)) + } + + stats_df <- dplyr::bind_rows(results_list) + message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields")) + + return(stats_df) +} + +#' Load or calculate weekly statistics (with RDS caching) +load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, + mosaic_dir, reports_dir, report_date = Sys.Date()) { + + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year) + rds_path <- file.path(reports_dir, "field_stats", rds_filename) + + if (file.exists(rds_path)) { + message(paste("Loading cached statistics from:", basename(rds_path))) + return(readRDS(rds_path)) + } + + message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) + stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date) + + output_dir <- file.path(reports_dir, "field_stats") + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + } + + saveRDS(stats_df, rds_path) + message(paste("Saved weekly statistics RDS:", basename(rds_path))) + + csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year) + csv_path <- file.path(output_dir, csv_filename) + readr::write_csv(stats_df, csv_path) + message(paste("Saved weekly statistics CSV:", basename(csv_path))) + + return(stats_df) +} + +#' Load historical field data from CSV (4-week lookback) +load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL, daily_vals_dir = NULL) { + + historical_data <- list() + loaded_weeks <- c() + missing_weeks <- c() + + for (lookback in 0:(num_weeks - 1)) { + target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback) + target_week <- target$week + target_year <- target$year + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv") + csv_path <- file.path(reports_dir, "field_analysis", csv_filename) + + if (file.exists(csv_path)) { + tryCatch({ + data <- readr::read_csv(csv_path, show_col_types = FALSE) + historical_data[[lookback + 1]] <- list( + week = target_week, + year = target_year, + data = data + ) + loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message)) + missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) + }) + } else { + missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) + } + } + + if (length(historical_data) == 0) { + message(paste("Error: No historical field data found")) + return(NULL) + } + + message(paste("✓ Loaded", length(historical_data), "weeks of historical data:", + paste(loaded_weeks, collapse = ", "))) + + return(historical_data) +} + +#' Calculate KPI trends (CI change, CV trends, 4-week and 8-week trends) +calculate_kpi_trends <- function(current_stats, prev_stats = NULL, + project_dir = NULL, reports_dir = NULL, + current_week = NULL, year = NULL) { + + message("Calculating KPI trends from current and previous week data") + + current_stats$Weekly_ci_change <- NA_real_ + current_stats$CV_Trend_Short_Term <- NA_real_ + current_stats$Four_week_trend <- NA_real_ + current_stats$CV_Trend_Long_Term <- NA_real_ + current_stats$nmr_of_weeks_analysed <- 1L + + if (is.null(prev_stats) || nrow(prev_stats) == 0) { + message(" No previous week data available - using defaults") + return(current_stats) + } + + message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) + + prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) + prev_field_analysis <- NULL + + tryCatch({ + analysis_dir <- file.path(reports_dir, "field_analysis") + if (dir.exists(analysis_dir)) { + analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) + if (length(analysis_files) > 0) { + recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] + prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, + col_select = c(Field_id, nmr_of_weeks_analysed, Phase)) + } + } + }, error = function(e) { + message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) + }) + + if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { + message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed")) + } + + historical_4weeks <- list() + historical_8weeks <- list() + + if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) { + message(" Loading historical field_stats for 4-week and 8-week trends...") + + for (lookback in 1:4) { + target_week <- current_week - lookback + target_year <- year + if (target_week < 1) { + target_week <- target_week + 52 + target_year <- target_year - 1 + } + + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) + rds_path <- file.path(reports_dir, "field_stats", rds_filename) + + if (file.exists(rds_path)) { + tryCatch({ + stats_data <- readRDS(rds_path) + historical_4weeks[[length(historical_4weeks) + 1]] <- list(week = target_week, stats = stats_data) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, ":", e$message)) + }) + } + } + + for (lookback in 1:8) { + target_week <- current_week - lookback + target_year <- year + if (target_week < 1) { + target_week <- target_week + 52 + target_year <- target_year - 1 + } + + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) + rds_path <- file.path(reports_dir, "field_stats", rds_filename) + + if (file.exists(rds_path)) { + tryCatch({ + stats_data <- readRDS(rds_path) + historical_8weeks[[length(historical_8weeks) + 1]] <- list(week = target_week, stats = stats_data) + }, error = function(e) { + # Silently skip + }) + } + } + + if (length(historical_4weeks) > 0) { + message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend")) + } + if (length(historical_8weeks) > 0) { + message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend")) + } + } + + cv_trends_calculated <- 0 + four_week_trends_calculated <- 0 + cv_long_term_calculated <- 0 + + for (i in seq_len(nrow(current_stats))) { + field_id <- current_stats$Field_id[i] + prev_idx <- prev_lookup[field_id] + + if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { + prev_row <- prev_stats[prev_idx, , drop = FALSE] + + prev_ci <- prev_row$Mean_CI[1] + if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) { + current_stats$Weekly_ci_change[i] <- round(current_stats$Mean_CI[i] - prev_ci, 2) + } + + prev_cv <- prev_row$CV[1] + if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { + current_stats$CV_Trend_Short_Term[i] <- calculate_cv_trend(current_stats$CV[i], prev_cv) + cv_trends_calculated <- cv_trends_calculated + 1 + } + + if (length(historical_4weeks) > 0) { + ci_values_4week <- numeric() + for (hist_idx in rev(seq_along(historical_4weeks))) { + hist_data <- historical_4weeks[[hist_idx]]$stats + hist_field <- which(hist_data$Field_id == field_id) + if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) { + ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]]) + } + } + ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) + + if (length(ci_values_4week) >= 2) { + current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) + four_week_trends_calculated <- four_week_trends_calculated + 1 + } + } + + if (length(historical_8weeks) > 0) { + cv_values_8week <- numeric() + for (hist_idx in rev(seq_along(historical_8weeks))) { + hist_data <- historical_8weeks[[hist_idx]]$stats + hist_field <- which(hist_data$Field_id == field_id) + if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) { + cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]]) + } + } + cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) + + if (length(cv_values_8week) >= 2) { + slope <- calculate_cv_trend_long_term(cv_values_8week) + current_stats$CV_Trend_Long_Term[i] <- slope + cv_long_term_calculated <- cv_long_term_calculated + 1 + } + } + + if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { + prev_analysis_row <- prev_field_analysis %>% dplyr::filter(Field_id == field_id) + + if (nrow(prev_analysis_row) > 0) { + prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1] + if (!is.na(prev_nmr_weeks_analysis)) { + current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L + } else { + current_stats$nmr_of_weeks_analysed[i] <- 1L + } + } + } + } + } + + message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields")) + message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields")) + message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields")) + return(current_stats) +} + +# ============================================================================ +# INTERNAL HELPER FUNCTIONS (from 80_kpi_utils.R) - DO NOT DELETE +# ============================================================================ + +# Spatial autocorrelation thresholds for field pattern analysis +MORAN_THRESHOLD_HIGH <- 0.95 # Very strong clustering (problematic patterns) +MORAN_THRESHOLD_MODERATE <- 0.85 # Moderate clustering +MORAN_THRESHOLD_LOW <- 0.7 # Normal field continuity + +#' Calculate coefficient of variation for uniformity assessment +calculate_cv <- function(values) { + values <- values[!is.na(values) & is.finite(values)] + if (length(values) < 2) return(NA) + cv <- sd(values) / mean(values) + return(cv) +} + +#' Calculate percentage of field with positive vs negative change +calculate_change_percentages <- function(current_values, previous_values) { + if (length(current_values) != length(previous_values)) { + return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) + } + + change_values <- current_values - previous_values + valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)] + + if (length(valid_changes) < 2) { + return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) + } + + positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100 + negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100 + stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100 + + return(list( + positive_pct = positive_pct, + negative_pct = negative_pct, + stable_pct = stable_pct + )) +} + +#' Calculate spatial autocorrelation (Moran's I) for a field +calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) { + tryCatch({ + field_raster <- terra::crop(ci_raster, field_boundary) + field_raster <- terra::mask(field_raster, field_boundary) + raster_points <- terra::as.points(field_raster, na.rm = TRUE) + + if (length(raster_points) < 10) { + return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data")) + } + + points_sf <- sf::st_as_sf(raster_points) + coords <- sf::st_coordinates(points_sf) + k_neighbors <- min(8, max(4, floor(nrow(coords) / 10))) + + knn_nb <- spdep::knearneigh(coords, k = k_neighbors) + knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE) + + ci_values <- points_sf[[1]] + moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE) + + morans_i <- moran_result$estimate[1] + p_value <- moran_result$p.value + + interpretation <- if (is.na(morans_i)) { + "insufficient_data" + } else if (p_value > 0.05) { + "random" + } else if (morans_i > MORAN_THRESHOLD_HIGH) { + "very_strong_clustering" + } else if (morans_i > MORAN_THRESHOLD_MODERATE) { + "strong_clustering" + } else if (morans_i > MORAN_THRESHOLD_LOW) { + "normal_continuity" + } else if (morans_i > 0.3) { + "weak_clustering" + } else if (morans_i < -0.3) { + "dispersed" + } else { + "low_autocorrelation" + } + + return(list(morans_i = morans_i, p_value = p_value, interpretation = interpretation)) + }, error = function(e) { + warning(paste("Error calculating spatial autocorrelation:", e$message)) + return(list(morans_i = NA, p_value = NA, interpretation = "error")) + }) +} + +#' Extract CI band from multi-band raster +extract_ci_values <- function(ci_raster, field_vect) { + extracted <- terra::extract(ci_raster, field_vect, fun = NULL) + + if ("CI" %in% names(extracted)) { + return(extracted[, "CI"]) + } else if (ncol(extracted) > 1) { + return(extracted[, ncol(extracted)]) + } else { + return(extracted[, 1]) + } +} + +#' Calculate current and previous week numbers using ISO 8601 +calculate_week_numbers <- function(report_date = Sys.Date()) { + current_week <- as.numeric(format(report_date, "%V")) + current_year <- as.numeric(format(report_date, "%G")) + + previous_week <- current_week - 1 + previous_year <- current_year + + if (previous_week < 1) { + previous_week <- 52 + previous_year <- current_year - 1 + } + + return(list( + current_week = current_week, + previous_week = previous_week, + year = current_year, + previous_year = previous_year + )) +} + +#' Load field CI raster (handles single-file and per-field architectures) +load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) { + is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field") + + if (is_per_field) { + per_field_dir <- attr(ci_raster_or_obj, "per_field_dir") + week_file <- attr(ci_raster_or_obj, "week_file") + field_mosaic_path <- file.path(per_field_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_mosaic <- terra::rast(field_mosaic_path) + if (terra::nlyr(field_mosaic) >= 5) { + return(field_mosaic[[5]]) + } else { + return(field_mosaic[[1]]) + } + }, error = function(e) { + return(NULL) + }) + } else { + return(NULL) + } + } else { + if (!is.null(field_vect)) { + return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE)) + } else { + return(ci_raster_or_obj) + } + } +} + +#' Load weekly CI mosaic (single-file or per-field) +load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { + week_file <- sprintf("week_%02d_%d.tif", week_num, year) + week_path <- file.path(mosaic_dir, week_file) + + if (file.exists(week_path)) { + tryCatch({ + mosaic_raster <- terra::rast(week_path) + ci_raster <- mosaic_raster[[5]] + names(ci_raster) <- "CI" + return(ci_raster) + }, error = function(e) { + return(NULL) + }) + } + + if (dir.exists(mosaic_dir)) { + field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + found_any <- FALSE + for (field in field_dirs) { + field_mosaic_path <- file.path(mosaic_dir, field, week_file) + if (file.exists(field_mosaic_path)) { + found_any <- TRUE + break + } + } + + if (found_any) { + dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA) + attr(dummy_raster, "per_field_dir") <- mosaic_dir + attr(dummy_raster, "week_file") <- week_file + attr(dummy_raster, "is_per_field") <- TRUE + return(dummy_raster) + } + } + + return(NULL) +} + +#' Prepare predictions with consistent naming +prepare_predictions <- function(predictions, newdata) { + return(predictions %>% + as.data.frame() %>% + dplyr::rename(predicted_Tcha = ".") %>% + dplyr::mutate( + sub_field = newdata$sub_field, + field = newdata$field, + Age_days = newdata$DOY, + total_CI = round(newdata$cumulative_CI, 0), + predicted_Tcha = round(predicted_Tcha, 0), + season = newdata$season + ) %>% + dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>% + dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) + ) +} diff --git a/r_app/90_CI_report_with_kpis_simple.Rmd b/r_app/90_CI_report_with_kpis_simple.Rmd index 18a2829..365c0a7 100644 --- a/r_app/90_CI_report_with_kpis_simple.Rmd +++ b/r_app/90_CI_report_with_kpis_simple.Rmd @@ -3,7 +3,7 @@ params: ref: "word-styles-reference-var1.docx" output_file: CI_report.docx report_date: "2026-01-22" - data_dir: "aura" + data_dir: "angata" mail_day: "Wednesday" borders: FALSE ci_plot_type: "both" # options: "absolute", "cumulative", "both" @@ -38,20 +38,26 @@ reporting_script <- TRUE # Load all packages at once with suppressPackageStartupMessages suppressPackageStartupMessages({ - library(here) - library(sf) - library(terra) - library(tidyverse) - library(tmap) - library(lubridate) - library(zoo) - library(rsample) - library(caret) - library(randomForest) - library(CAST) - library(knitr) - library(tidyr) - library(flextable) + # File path handling + library(here) # For relative path resolution (platform-independent file paths) + + # Spatial data handling + library(sf) # For reading/manipulating field boundaries (GeoJSON) + library(terra) # For raster operations (reading mosaic TIFFs for visualization) + + # Data manipulation + library(tidyverse) # For dplyr, ggplot2, tidyr (data wrangling and visualization) + library(tidyr) # For data reshaping (pivot_longer, pivot_wider for wide-to-long conversion) + library(lubridate) # For date/time operations (week extraction, date formatting) + library(zoo) # For zoo objects (time series manipulation, na.locf for gap filling) + + # Visualization + library(tmap) # For interactive maps (field boundary visualization) + + # Reporting + library(knitr) # For R Markdown document generation (code execution and output) + library(flextable) # For formatted tables in Word output (professional table styling) + library(officer) # For Word document manipulation (custom formatting, headers, footers) }) # Load custom utility functions @@ -101,10 +107,14 @@ project_dir <- params$data_dir # Source project parameters with error handling tryCatch({ source(here::here("r_app", "parameters_project.R")) + source(here::here("r_app", "00_common_utils.R")) }, error = function(e) { - stop("Error loading parameters_project.R: ", e$message) + stop("Error loading project utilities: ", e$message) }) +# Load centralized paths +paths <- setup_project_directories(project_dir) + # Log initial configuration safe_log("Starting the R Markdown script with KPIs") safe_log(paste("mail_day params:", params$mail_day)) @@ -114,15 +124,14 @@ safe_log(paste("mail_day variable:", mail_day)) ```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE} ## SIMPLE KPI LOADING - robust lookup with fallbacks -# Primary expected directory inside the laravel storage -kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis") +# Primary expected directory from centralized paths +kpi_data_dir <- paths$kpi_reports_dir date_suffix <- format(as.Date(report_date), "%Y%m%d") # Calculate current week from report_date using ISO 8601 week numbering -report_date_obj <- as.Date(report_date) -current_week <- lubridate::isoweek(report_date_obj) -current_iso_year <- lubridate::isoyear(report_date_obj) -week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_iso_year) +current_week <- as.numeric(format(as.Date(report_date), "%V")) +current_year <- as.numeric(format(as.Date(report_date), "%G")) +week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year) # Candidate filenames we expect (exact and common variants) expected_summary_names <- c( @@ -346,50 +355,17 @@ safe_log(paste("Week range:", week_start, "to", week_end)) ``` ```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE} -# Load CI index data with error handling +# Load CI quadrant data for field-level analysis tryCatch({ - CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) - + CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) safe_log("Successfully loaded CI quadrant data") }, error = function(e) { stop("Error loading CI quadrant data: ", e$message) }) -# Get file paths for different weeks using the utility function -tryCatch({ - path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0) - path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1) - path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2) - path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3) - - # Log the calculated paths - safe_log("Required mosaic paths:") - safe_log(paste("Path to current week:", path_to_week_current)) - safe_log(paste("Path to week minus 1:", path_to_week_minus_1)) - safe_log(paste("Path to week minus 2:", path_to_week_minus_2)) - safe_log(paste("Path to week minus 3:", path_to_week_minus_3)) - - # Validate that files exist - if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current) - if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1) - if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2) - if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3) - - # Load raster data with terra functions - CI <- terra::rast(path_to_week_current)$CI - CI_m1 <- terra::rast(path_to_week_minus_1)$CI - CI_m2 <- terra::rast(path_to_week_minus_2)$CI - CI_m3 <- terra::rast(path_to_week_minus_3)$CI - - # DEBUG: Check which weeks were actually loaded and their data ranges - safe_log(paste("DEBUG - CI (current) range:", paste(terra::minmax(CI)[,1], collapse=" to "))) - safe_log(paste("DEBUG - CI_m1 (week-1) range:", paste(terra::minmax(CI_m1)[,1], collapse=" to "))) - safe_log(paste("DEBUG - CI_m2 (week-2) range:", paste(terra::minmax(CI_m2)[,1], collapse=" to "))) - safe_log(paste("DEBUG - CI_m3 (week-3) range:", paste(terra::minmax(CI_m3)[,1], collapse=" to "))) - -}, error = function(e) { - stop("Error loading raster data: ", e$message) -}) +# NOTE: Overview maps skipped for this report +# Individual field sections load their own per-field mosaics directly +``` ``` ```{r compute_benchmarks_once, include=FALSE} @@ -450,7 +426,7 @@ if (exists("summary_tables") && !is.null(summary_tables)) { ## Executive Summary - Key Performance Indicators -```{r combined_kpi_table, echo=FALSE} +```{r combined_kpi_table, echo=FALSE, results='asis'} # Combine all KPI tables into a single table with standardized column names display_names <- c( field_uniformity_summary = "Field Uniformity", @@ -504,7 +480,7 @@ ft ## Field Alerts -```{r field_alerts_table, echo=FALSE} +```{r field_alerts_table, echo=FALSE, results='asis'} # Generate alerts for all fields generate_field_alerts <- function(field_details_table) { if (is.null(field_details_table) || nrow(field_details_table) == 0) { @@ -610,42 +586,13 @@ if (exists("field_details_table") && !is.null(field_details_table)) { } ``` - -```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE} -# Calculate difference rasters for comparisons -# When one week has NA values, the difference will also be NA (not zero) -# Initialize placeholders first to ensure they exist -last_week_dif_raster_abs <- NULL -three_week_dif_raster_abs <- NULL - -tryCatch({ - # Always calculate differences - NA values will propagate naturally - # This way empty weeks (all NA) result in NA differences, not misleading zeros - last_week_dif_raster_abs <- (CI - CI_m1) - three_week_dif_raster_abs <- (CI - CI_m3) - - safe_log("Calculated difference rasters (NA values preserved)") - -}, error = function(e) { - safe_log(paste("Error calculating difference rasters:", e$message), "ERROR") - # Fallback: create NA placeholders if calculation fails - if (is.null(last_week_dif_raster_abs)) { - last_week_dif_raster_abs <- CI * NA - } - if (is.null(three_week_dif_raster_abs)) { - three_week_dif_raster_abs <- CI * NA - } -}) - -# Final safety check - ensure variables exist in global environment -if (is.null(last_week_dif_raster_abs)) { - last_week_dif_raster_abs <- CI * NA - safe_log("Created NA placeholder for last_week_dif_raster_abs", "WARNING") -} -if (is.null(three_week_dif_raster_abs)) { - three_week_dif_raster_abs <- CI * NA - safe_log("Created NA placeholder for three_week_dif_raster_abs", "WARNING") +```{r data, message=TRUE, warning=TRUE, include=FALSE} +# Verify CI quadrant data is loaded from load_ci_data chunk +if (!exists("CI_quadrant") || is.null(CI_quadrant)) { + stop("CI_quadrant data not available - check load_ci_data chunk") } +safe_log("CI quadrant data verified for field-level analysis") +``` ``` ```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE} @@ -665,76 +612,6 @@ tryCatch({ stop("Error loading field boundaries: ", e$message) }) ``` -\newpage - -## Chlorophyll Index (CI) Overview Map - Current Week -```{r render_ci_overview_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE} -# Create overview chlorophyll index map -tryCatch({ - # Choose palette based on colorblind_friendly parameter - ci_palette <- if (colorblind_friendly) "viridis" else "brewer.rd_yl_gn" - - # Base shape - map <- tmap::tm_shape(CI, unit = "m") - - # Add raster layer with continuous spectrum (fixed scale 1-8 for consistent comparison) - map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = ci_palette, - limits = c(1, 8)), - col.legend = tm_legend(title = "Chlorophyll Index (CI)", - orientation = "landscape", - position = tm_pos_out("center", "bottom"))) - # Complete the map with layout and other elements - map <- map + - tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") + - tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") + - tmap::tm_shape(AllPivots0) + - tmap::tm_borders(col = "black") + - tmap::tm_text("sub_field", size = 0.6, col = "black") - - # Print the map - print(map) -}, error = function(e) { - safe_log(paste("Error creating CI overview map:", e$message), "ERROR") - plot(1, type="n", axes=FALSE, xlab="", ylab="") - text(1, 1, "Error creating CI overview map", cex=1.5) -}) - -``` -\newpage - -## Weekly Chlorophyll Index Difference Map -```{r render_ci_difference_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE} -# Create chlorophyll index difference map -tryCatch({ - # Choose palette based on colorblind_friendly parameter - diff_palette <- if (colorblind_friendly) "plasma" else "brewer.rd_yl_gn" - - # Base shape - map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m") - - # Add raster layer with continuous spectrum (centered at 0 for difference maps, fixed scale) - map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = diff_palette, - midpoint = 0, - limits = c(-3, 3)), - col.legend = tm_legend(title = "Chlorophyll Index (CI) Change", - orientation = "landscape", - position = tm_pos_out("center", "bottom"))) - # Complete the map with layout and other elements - map <- map + - tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") + - tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") + - tmap::tm_shape(AllPivots0) + - tmap::tm_borders(col = "black") + - tmap::tm_text("sub_field", size = 0.6, col = "black") - - # Print the map - print(map) -}, error = function(e) { - safe_log(paste("Error creating CI difference map:", e$message), "ERROR") - plot(1, type="n", axes=FALSE, xlab="", ylab="") - text(1, 1, "Error creating CI difference map", cex=1.5) -}) -``` # Section 2: Field-by-Field Analysis @@ -760,6 +637,23 @@ tryCatch({ dplyr::group_by(field) %>% dplyr::summarise(.groups = 'drop') + # Use per-field weekly mosaic directory path from parameters_project.R + weekly_mosaic_per_field_dir <- weekly_CI_mosaic + + # Helper to get week/year from a date + get_week_year <- function(date) { + list( + week = as.numeric(format(date, "%V")), + year = as.numeric(format(date, "%G")) + ) + } + + # Get week/year for current and historical weeks (local to field section) + current_ww <- get_week_year(as.Date(today)) + minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1)) + minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2)) + minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3)) + # Generate plots for each field for(i in seq_along(AllPivots_merged$field)) { field_name <- AllPivots_merged$field[i] @@ -775,15 +669,71 @@ tryCatch({ cat("\\newpage\n\n") } - # Call ci_plot with explicit parameters (ci_plot will generate its own header) + # Load per-field mosaics directly for this field + field_CI <- NULL + field_CI_m1 <- NULL + field_CI_m2 <- NULL + field_CI_m3 <- NULL + + tryCatch({ + # Load per-field mosaic for current week + per_field_path_current <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year + ) + if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) { + field_CI <- terra::rast(per_field_path_current)[["CI"]] + } + + # Load per-field mosaic for week-1 + per_field_path_m1 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year + ) + if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) { + field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]] + } + + # Load per-field mosaic for week-2 + per_field_path_m2 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year + ) + if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) { + field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]] + } + + # Load per-field mosaic for week-3 + per_field_path_m3 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year + ) + if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) { + field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]] + } + + safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG") + + }, error = function(e) { + safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING") + }) + + # Calculate difference rasters from per-field data (local to this field) + last_week_dif_raster_field <- NULL + three_week_dif_raster_field <- NULL + + if (!is.null(field_CI) && !is.null(field_CI_m1)) { + last_week_dif_raster_field <- field_CI - field_CI_m1 + } + if (!is.null(field_CI) && !is.null(field_CI_m3)) { + three_week_dif_raster_field <- field_CI - field_CI_m3 + } + + # Call ci_plot with field-specific rasters ci_plot( pivotName = field_name, field_boundaries = AllPivots0, - current_ci = CI, - ci_minus_1 = CI_m1, - ci_minus_2 = CI_m2, - last_week_diff = last_week_dif_raster_abs, - three_week_diff = three_week_dif_raster_abs, + current_ci = field_CI, + ci_minus_1 = field_CI_m1, + ci_minus_2 = field_CI_m2, + last_week_diff = last_week_dif_raster_field, + three_week_diff = three_week_dif_raster_field, harvesting_data = harvesting_data, week = week, week_minus_1 = week_minus_1, @@ -882,9 +832,9 @@ tryCatch({ The following table provides a comprehensive overview of all monitored fields with their key performance metrics from the KPI analysis. -```{r detailed_field_table, echo=FALSE} +```{r detailed_field_table, echo=FALSE, results='asis'} # Load CI quadrant data to get field ages -CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) +CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) # Identify the current season for each field based on report_date # The current season is the one where the report_date falls within or shortly after the season @@ -1027,7 +977,7 @@ Comparing the current season to these lines helps assess whether crop growth is \newpage ## Report Metadata -```{r report_metadata, echo=FALSE} +```{r report_metadata, echo=FALSE, results='asis'} metadata_info <- data.frame( Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"), Value = c( diff --git a/r_app/90_CI_report_with_kpis_simple_NO_TABLES.Rmd b/r_app/90_CI_report_with_kpis_simple_NO_TABLES.Rmd deleted file mode 100644 index 02ad682..0000000 --- a/r_app/90_CI_report_with_kpis_simple_NO_TABLES.Rmd +++ /dev/null @@ -1,584 +0,0 @@ ---- -params: - ref: "word-styles-reference-var1.docx" - output_file: CI_report.docx - report_date: "2025-09-30" - data_dir: "aura" - mail_day: "Wednesday" - borders: FALSE - ci_plot_type: "both" # options: "absolute", "cumulative", "both" - colorblind_friendly: TRUE # use colorblind-friendly palettes (viridis/plasma) - facet_by_season: FALSE # facet CI trend plots by season instead of overlaying - x_axis_unit: "days" # x-axis unit for trend plots: "days" or "weeks" -output: - word_document: - reference_docx: !expr file.path("word-styles-reference-var1.docx") - toc: no -editor_options: - chunk_output_type: console ---- - -```{r setup_parameters, include=FALSE} -# Set up basic report parameters from input values -report_date <- params$report_date -mail_day <- params$mail_day -borders <- params$borders -ci_plot_type <- params$ci_plot_type -colorblind_friendly <- params$colorblind_friendly -facet_by_season <- params$facet_by_season -x_axis_unit <- params$x_axis_unit -``` - -```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE} -# Configure knitr options -knitr::opts_chunk$set(warning = FALSE, message = FALSE) - -# Set flag for reporting scripts to use pivot.geojson instead of pivot_2.geojson -reporting_script <- TRUE - -# Load all packages at once with suppressPackageStartupMessages -suppressPackageStartupMessages({ - library(here) - library(sf) - library(terra) - library(tidyverse) - library(tmap) - library(lubridate) - library(zoo) - library(rsample) - library(caret) - library(randomForest) - library(CAST) - library(knitr) - library(tidyr) - library(flextable) - library(officer) -}) - -# Load custom utility functions -tryCatch({ - source("report_utils.R") -}, error = function(e) { - message(paste("Error loading report_utils.R:", e$message)) - # Try alternative path if the first one fails - tryCatch({ - source(here::here("r_app", "report_utils.R")) - }, error = function(e) { - stop("Could not load report_utils.R from either location: ", e$message) - }) -}) - -# Function to determine field priority level based on CV and Moran's I -# Returns: 1=Urgent, 2=Monitor, 3=No stress -get_field_priority_level <- function(cv, morans_i) { - # Handle NA values - if (is.na(cv) || is.na(morans_i)) return(3) # Default to no stress - - # Determine priority based on thresholds - if (cv < 0.1) { - if (morans_i < 0.7) { - return(3) # No stress - } else if (morans_i <= 0.9) { - return(2) # Monitor (young field with some clustering) - } else { - return(1) # Urgent - } - } else if (cv <= 0.15) { - if (morans_i < 0.7) { - return(2) # Monitor - } else { - return(1) # Urgent - } - } else { # cv > 0.15 - return(1) # Urgent - } -} -``` - -```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE} -# Set the project directory from parameters -project_dir <- params$data_dir - -# Source project parameters with error handling -tryCatch({ - source(here::here("r_app", "parameters_project.R")) -}, error = function(e) { - stop("Error loading parameters_project.R: ", e$message) -}) - -# Log initial configuration -safe_log("Starting the R Markdown script with KPIs - NO TABLES VERSION") -safe_log(paste("mail_day params:", params$mail_day)) -safe_log(paste("report_date params:", params$report_date)) -safe_log(paste("mail_day variable:", mail_day)) -``` - -```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE} -## SIMPLE KPI LOADING - robust lookup with fallbacks -# Primary expected directory inside the laravel storage -kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis") -date_suffix <- format(as.Date(report_date), "%Y%m%d") - -# Calculate current week from report_date using ISO 8601 week numbering -current_week <- as.numeric(format(as.Date(report_date), "%V")) -current_year <- as.numeric(format(as.Date(report_date), "%G")) -week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year) - -# Candidate filenames we expect (exact and common variants) -expected_summary_names <- c( - paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"), - paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"), - paste0(project_dir, "_kpi_summary_tables.rds"), - "kpi_summary_tables.rds", - paste0("kpi_summary_tables_", week_suffix, ".rds"), - paste0("kpi_summary_tables_", date_suffix, ".rds") -) - -expected_field_details_names <- c( - paste0(project_dir, "_field_details_", week_suffix, ".rds"), - paste0(project_dir, "_field_details_", date_suffix, ".rds"), - paste0(project_dir, "_field_details.rds"), - "field_details.rds" -) - -# Helper to attempt loading a file from the directory or fallback to a workspace-wide search -try_load_from_dir <- function(dir, candidates) { - if (!dir.exists(dir)) return(NULL) - for (name in candidates) { - f <- file.path(dir, name) - if (file.exists(f)) return(f) - } - return(NULL) -} - -# Try primary directory first -summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names) -field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names) - -# If not found, perform a workspace-wide search (slower) limited to laravel_app storage -if (is.null(summary_file) || is.null(field_details_file)) { - safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files")) - # List rds files under laravel_app/storage/app recursively - files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE) - # Try to match by expected names - if (is.null(summary_file)) { - matched <- files[basename(files) %in% expected_summary_names] - if (length(matched) > 0) summary_file <- matched[1] - } - if (is.null(field_details_file)) { - matched2 <- files[basename(files) %in% expected_field_details_names] - if (length(matched2) > 0) field_details_file <- matched2[1] - } -} - -# Final checks and load with safe error messages -kpi_files_exist <- FALSE -if (!is.null(summary_file) && file.exists(summary_file)) { - safe_log(paste("Loading KPI summary from:", summary_file)) - summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL }) - if (!is.null(summary_tables)) kpi_files_exist <- TRUE -} else { - safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING") -} - -if (!is.null(field_details_file) && file.exists(field_details_file)) { - safe_log(paste("Loading field details from:", field_details_file)) - field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL }) - if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE -} else { - safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING") -} - -if (kpi_files_exist) { - safe_log("✓ KPI summary tables loaded successfully") -} else { - safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING") -} - -#' Generate field-specific KPI summary for display in reports -#' @param field_name Name of the field to summarize -#' @param field_details_table Data frame with field-level KPI details -#' @return Formatted text string with field KPI summary -generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadrant) { - tryCatch({ - # Get field age from CI quadrant data for the CURRENT SEASON only - # First identify the current season for this field - current_season <- CI_quadrant %>% - filter(field == field_name, Date <= as.Date(report_date)) %>% - group_by(season) %>% - summarise(season_end = max(Date), .groups = 'drop') %>% - filter(season == max(season)) %>% - pull(season) - - # Get the most recent DOY from the current season - field_age <- CI_quadrant %>% - filter(field == field_name, season == current_season) %>% - pull(DOY) %>% - max(na.rm = TRUE) - - # Filter data for this specific field - field_data <- field_details_table %>% - filter(Field == field_name) - - if (nrow(field_data) == 0) { - return(paste("**Field", field_name, "KPIs:** Data not available")) - } - - # Aggregate sub-field data for field-level summary - # For categorical data, take the most common value or highest risk level - field_summary <- field_data %>% - summarise( - field_size = sum(`Field Size (ha)`, na.rm = TRUE), - uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"), - avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)), - max_gap_score = max(`Gap Score`, na.rm = TRUE), - highest_decline_risk = case_when( - any(`Decline Risk` == "Very-high") ~ "Very-high", - any(`Decline Risk` == "High") ~ "High", - any(`Decline Risk` == "Moderate") ~ "Moderate", - any(`Decline Risk` == "Low") ~ "Low", - TRUE ~ "Unknown" - ), - highest_weed_risk = case_when( - any(`Weed Risk` == "High") ~ "High", - any(`Weed Risk` == "Moderate") ~ "Moderate", - any(`Weed Risk` == "Low") ~ "Low", - TRUE ~ "Unknown" - ), - avg_mean_ci = mean(`Mean CI`, na.rm = TRUE), - avg_cv = mean(`CV Value`, na.rm = TRUE), - .groups = 'drop' - ) - - # Apply age-based filtering to yield forecast - if (is.na(field_age) || field_age < 240) { - field_summary$avg_yield_forecast <- NA_real_ - } - - # Format the summary text - yield_text <- if (is.na(field_summary$avg_yield_forecast)) { - "Yield Forecast: NA" - } else { - paste0("Yield Forecast: ", round(field_summary$avg_yield_forecast, 1), " t/ha") - } - - kpi_text <- paste0( - "Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels, - " | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1), - " | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk, - " | Mean CI: ", round(field_summary$avg_mean_ci, 2) - ) - - kpi_text <- paste0("", kpi_text, "") - - return(kpi_text) - - }, error = function(e) { - safe_log(paste("Error generating KPI summary for field", field_name, ":", e$message), "ERROR") - return(paste("**Field", field_name, "KPIs:** Error generating summary")) - }) -} -``` - -```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE} -# Set locale for consistent date formatting -Sys.setlocale("LC_TIME", "C") - -# Initialize date variables from parameters -today <- as.character(report_date) -mail_day_as_character <- as.character(mail_day) - -# Calculate report dates and weeks using ISO 8601 week numbering -report_date_obj <- as.Date(today) -current_week <- as.numeric(format(report_date_obj, "%V")) -year <- as.numeric(format(report_date_obj, "%Y")) - -# Calculate dates for weekly analysis -week_start <- report_date_obj - ((as.numeric(format(report_date_obj, "%w")) + 1) %% 7) -week_end <- week_start + 6 - -# Calculate week days (copied from 05 script for compatibility) -report_date_as_week_day <- weekdays(lubridate::ymd(today)) -days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday") - -# Calculate initial week number -week <- lubridate::week(today) -safe_log(paste("Initial week calculation:", week, "today:", today)) - -# Calculate previous dates for comparisons -today_minus_1 <- as.character(lubridate::ymd(today) - 7) -today_minus_2 <- as.character(lubridate::ymd(today) - 14) -today_minus_3 <- as.character(lubridate::ymd(today) - 21) - -# Adjust week calculation based on mail day -if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) { - safe_log("Adjusting weeks because of mail day") - week <- lubridate::week(today) + 1 - today_minus_1 <- as.character(lubridate::ymd(today)) - today_minus_2 <- as.character(lubridate::ymd(today) - 7) - today_minus_3 <- as.character(lubridate::ymd(today) - 14) -} - -# Calculate week numbers for previous weeks -week_minus_1 <- week - 1 -week_minus_2 <- week - 2 -week_minus_3 <- week - 3 - -# Format current week with leading zeros -week <- sprintf("%02d", week) - -safe_log(paste("Report week:", current_week, "Year:", year)) -safe_log(paste("Week range:", week_start, "to", week_end)) -``` - -```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE} -# Load CI quadrant data for field-level analysis -tryCatch({ - CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) - safe_log("Successfully loaded CI quadrant data") -}, error = function(e) { - stop("Error loading CI quadrant data: ", e$message) -}) - -# NOTE: Overview maps skipped for this report -# Individual field sections load their own per-field mosaics directly -``` -``` - -```{r compute_benchmarks_once, include=FALSE} -# Compute CI benchmarks once for the entire estate -benchmarks <- compute_ci_benchmarks(CI_quadrant, project_dir, c(10, 50, 90)) -if (!is.null(benchmarks)) { - safe_log("Benchmarks computed successfully for the report") -} else { - safe_log("Failed to compute benchmarks", "WARNING") -} -``` - -## Report Summary - -**Farm Location:** `r toupper(project_dir)` Estate -**Report Period:** Week `r current_week` of `r year` -**Data Source:** Planet Labs Satellite Imagery -**Analysis Type:** Chlorophyll Index (CI) Monitoring -**Report Generated on:** `r format(Sys.time(), "%B %d, %Y at %H:%M")` - -**NOTE: THIS IS A NO-TABLES VERSION FOR DIAGNOSTIC PURPOSES - MAPS AND GRAPHS ONLY** - -\newpage - -# Section 2: Field-by-Field Analysis - -## Overview of Field-Level Insights -This section provides detailed, field-specific analyses including chlorophyll index maps, trend graphs, and performance metrics. Each field is analyzed individually to support targeted interventions. - -**Key Elements per Field:** -- Current and historical CI maps -- Week-over-week change visualizations -- Cumulative growth trends -- Field-specific KPI summaries - -*Navigate to the following pages for individual field reports.* - -\newpage - -```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE} -# Load field boundaries from parameters -tryCatch({ - AllPivots0 <- field_boundaries_sf %>% - dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names - safe_log("Successfully loaded field boundaries") - - # Prepare merged field list for use in summaries - AllPivots_merged <- AllPivots0 %>% - dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names - dplyr::group_by(field) %>% - dplyr::summarise(.groups = 'drop') - -}, error = function(e) { - stop("Error loading field boundaries: ", e$message) -}) -``` - -```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, dpi=300, dev='png', message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'} -# Generate detailed visualizations for each field -tryCatch({ - # Merge field polygons for processing and filter out NA field names - AllPivots_merged <- AllPivots0 %>% - dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA fields - dplyr::group_by(field) %>% - dplyr::summarise(.groups = 'drop') - - # Use per-field weekly mosaic directory path from parameters_project.R - weekly_mosaic_per_field_dir <- weekly_CI_mosaic - - # Helper to get week/year from a date - get_week_year <- function(date) { - list( - week = as.numeric(format(date, "%V")), - year = as.numeric(format(date, "%G")) - ) - } - - # Get week/year for current and historical weeks (local to field section) - current_ww <- get_week_year(as.Date(today)) - minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1)) - minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2)) - minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3)) - - # Debug: check how many fields we're iterating - safe_log(paste("Starting visualization loop for", nrow(AllPivots_merged), "fields"), "DEBUG") - safe_log(paste("Fields to process:", paste(AllPivots_merged$field, collapse=", ")), "DEBUG") - - # Generate plots for each field - for(i in seq_along(AllPivots_merged$field)) { - field_name <- AllPivots_merged$field[i] - safe_log(paste("Processing field", i, "of", nrow(AllPivots_merged), ":", field_name), "DEBUG") - - # Skip if field_name is still NA (double check) - if(is.na(field_name)) { - safe_log(paste("Skipping field", i, "- NA name"), "DEBUG") - next - } - - tryCatch({ - # Add page break before each field (except the first one) - if(i > 1) { - cat("\\newpage\n\n") - } - - # Load per-field mosaics directly for this field - field_CI <- NULL - field_CI_m1 <- NULL - field_CI_m2 <- NULL - field_CI_m3 <- NULL - - tryCatch({ - # Load per-field mosaic for current week - per_field_path_current <- get_per_field_mosaic_path( - weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year - ) - safe_log(paste("Looking for mosaic at:", per_field_path_current, "exists?", file.exists(per_field_path_current %||% "")), "DEBUG") - if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) { - field_CI <- terra::rast(per_field_path_current)[["CI"]] - safe_log(paste("Successfully loaded field_CI for", field_name), "DEBUG") - } else { - safe_log(paste("Could not load field_CI for", field_name, "- file not found"), "DEBUG") - } - - # Load per-field mosaic for week-1 - per_field_path_m1 <- get_per_field_mosaic_path( - weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year - ) - if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) { - field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]] - } - - # Load per-field mosaic for week-2 - per_field_path_m2 <- get_per_field_mosaic_path( - weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year - ) - if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) { - field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]] - } - - # Load per-field mosaic for week-3 - per_field_path_m3 <- get_per_field_mosaic_path( - weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year - ) - if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) { - field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]] - } - - safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG") - - }, error = function(e) { - safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING") - }) - - # Calculate difference rasters from per-field data (local to this field) - last_week_dif_raster_field <- NULL - three_week_dif_raster_field <- NULL - - if (!is.null(field_CI) && !is.null(field_CI_m1)) { - last_week_dif_raster_field <- field_CI - field_CI_m1 - } - if (!is.null(field_CI) && !is.null(field_CI_m3)) { - three_week_dif_raster_field <- field_CI - field_CI_m3 - } - - # Call ci_plot with field-specific rasters - ci_plot( - pivotName = field_name, - field_boundaries = AllPivots0, - current_ci = field_CI, - ci_minus_1 = field_CI_m1, - ci_minus_2 = field_CI_m2, - last_week_diff = last_week_dif_raster_field, - three_week_diff = three_week_dif_raster_field, - harvesting_data = harvesting_data, - week = week, - week_minus_1 = week_minus_1, - week_minus_2 = week_minus_2, - week_minus_3 = week_minus_3, - borders = borders, - colorblind_friendly = colorblind_friendly - ) - - cat("\n\n") - - # Special handling for ESA project field 00f25 - remove duplicate DOY values - if (project_dir == "esa" && field_name == "00F25") { - ci_quadrant_data <- CI_quadrant %>% - filter(field == "00F25") %>% - arrange(DOY) %>% - group_by(DOY) %>% - slice(1) %>% - ungroup() - } else { - ci_quadrant_data <- CI_quadrant - } - - # Call cum_ci_plot with explicit parameters - cum_ci_plot( - pivotName = field_name, - ci_quadrant_data = ci_quadrant_data, - plot_type = ci_plot_type, - facet_on = facet_by_season, - x_unit = x_axis_unit, - colorblind_friendly = colorblind_friendly, - show_benchmarks = TRUE, - estate_name = project_dir, - benchmark_percentiles = c(10, 50, 90), - benchmark_data = benchmarks - ) - - cat("\n\n") - - # Add field-specific KPI summary under the graphs - if (exists("field_details_table") && !is.null(field_details_table)) { - kpi_summary <- generate_field_kpi_summary(field_name, field_details_table, CI_quadrant) - cat(kpi_summary) - cat("\n\n") - } - - }, error = function(e) { - safe_log(paste("Error generating plots for field", field_name, ":", e$message), "ERROR") - cat("\\newpage\n\n") - cat("# Error generating plots for field ", field_name, "\n\n") - cat(e$message, "\n\n") - }) - } -}, error = function(e) { - safe_log(paste("Error in field visualization section:", e$message), "ERROR") - cat("Error generating field plots. See log for details.\n\n") -}) -``` - -\newpage - -# END OF NO-TABLES DIAGNOSTIC REPORT - -This diagnostic report contains only maps and graphs to help identify if the visualization system is working correctly. - -*Generated for diagnostic purposes* diff --git a/r_app/91_CI_report_with_kpis_Angata.Rmd b/r_app/91_CI_report_with_kpis_Angata.Rmd index c370d8e..a6ca227 100644 --- a/r_app/91_CI_report_with_kpis_Angata.Rmd +++ b/r_app/91_CI_report_with_kpis_Angata.Rmd @@ -2,7 +2,7 @@ params: ref: "word-styles-reference-var1.docx" output_file: CI_report.docx - report_date: "2026-01-25" + report_date: "2025-09-30" data_dir: "angata" mail_day: "Wednesday" borders: FALSE @@ -38,20 +38,25 @@ reporting_script <- TRUE # Load all packages at once with suppressPackageStartupMessages suppressPackageStartupMessages({ - library(here) - library(sf) - library(terra) - library(tidyverse) - library(tmap) - library(lubridate) - library(zoo) - library(rsample) - library(caret) - library(randomForest) - library(CAST) - library(knitr) - library(tidyr) - library(flextable) + # File path handling + library(here) # For relative path resolution (platform-independent file paths) + + # Spatial data handling + library(sf) # For reading/manipulating field boundaries (GeoJSON) + library(terra) # For raster operations (reading mosaic TIFFs for visualization) + + # Data manipulation + library(tidyverse) # For dplyr, ggplot2, tidyr (data wrangling and visualization) + library(tidyr) # For data reshaping (pivot_longer, pivot_wider for wide-to-long conversion) + library(lubridate) # For date/time operations (week extraction, date formatting) + library(zoo) # For zoo objects (time series manipulation, na.locf for gap filling) + + # Visualization + library(tmap) # For interactive maps (field boundary visualization) + + # Reporting + library(knitr) # For R Markdown document generation (code execution and output) + library(flextable) # For formatted tables in Word output (professional table styling) }) # Load custom utility functions @@ -105,6 +110,16 @@ tryCatch({ stop("Error loading parameters_project.R: ", e$message) }) +# Source common utilities for logging and helper functions +tryCatch({ + source(here::here("r_app", "00_common_utils.R")) +}, error = function(e) { + stop("Error loading 00_common_utils.R: ", e$message) +}) + +# Load centralized paths +paths <- setup_project_directories(project_dir) + # Log initial configuration safe_log("Starting the R Markdown script with KPIs") safe_log(paste("mail_day params:", params$mail_day)) @@ -112,16 +127,23 @@ safe_log(paste("report_date params:", params$report_date)) safe_log(paste("mail_day variable:", mail_day)) ``` -```{r load_kpi_data, eval=TRUE, message=FALSE, warning=FALSE, include=FALSE} +```{r load_kpi_data, message=FALSE, warning=FALSE} ## SIMPLE KPI LOADING - robust lookup with fallbacks -# Primary expected directory inside the laravel storage -kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis") + +# First, show working directory for debugging +cat("\n=== DEBUG: R Markdown Working Directory ===\n") +cat(paste("getwd():", getwd(), "\n")) +cat(paste("Expected knit_dir from R Markdown:", knitr::opts_knit$get("root.dir"), "\n\n")) + +# Primary expected directory from centralized paths +kpi_data_dir <- paths$kpi_reports_dir date_suffix <- format(as.Date(report_date), "%Y%m%d") # Calculate current week from report_date using ISO 8601 week numbering current_week <- as.numeric(format(as.Date(report_date), "%V")) -current_iso_year <- as.numeric(format(as.Date(report_date), "%G")) -week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_iso_year) +current_year <- as.numeric(format(as.Date(report_date), "%G")) +week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year) + # Candidate filenames we expect (exact and common variants) expected_summary_names <- c( paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"), @@ -171,30 +193,69 @@ if (is.null(summary_file) || is.null(field_details_file)) { # Final checks and load with safe error messages kpi_files_exist <- FALSE + +# Debug: log what we're looking for +cat("\n=== KPI LOADING DEBUG ===\n") +cat(paste("Working directory:", getwd(), "\n")) +cat(paste("project_dir:", project_dir, "\n")) +cat(paste("report_date:", report_date, "\n")) +cat(paste("Calculated week:", current_week, "year:", current_year, "\n")) +cat(paste("Looking for KPI files in:", kpi_data_dir, "\n")) +cat(paste("Directory exists:", dir.exists(kpi_data_dir), "\n")) +cat(paste("Expected filenames to match:\n")) +for (name in expected_summary_names) cat(paste(" -", name, "\n")) + +# List what's actually in the directory +if (dir.exists(kpi_data_dir)) { + actual_files <- list.files(kpi_data_dir, pattern = ".*\\.rds$", full.names = FALSE) + cat(paste("Files in KPI directory (", length(actual_files), " total):\n")) + for (f in actual_files) cat(paste(" -", f, "\n")) +} else { + cat("KPI directory does NOT exist!\n") +} + if (!is.null(summary_file) && file.exists(summary_file)) { - safe_log(paste("Loading KPI summary from:", summary_file)) - summary_data <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL }) + cat(paste("✓ FOUND summary file:", summary_file, "\n")) + cat(paste(" File size:", file.size(summary_file), "bytes\n")) + summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL }) - # Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format if (!is.null(summary_data)) { + cat(paste(" ✓ Loaded successfully. Class:", class(summary_data), "\n")) + if (is.list(summary_data)) { + cat(paste(" List names:", paste(names(summary_data), collapse = ", "), "\n")) + } + + # Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format if (is.list(summary_data) && !is.data.frame(summary_data)) { # New format from 09_field_analysis_weekly.R - just pass it through if ("field_analysis_summary" %in% names(summary_data)) { + cat(" ✓ Found field_analysis_summary in list - will use this structure\n") # Keep the new structure intact - combined_kpi_table will use it directly kpi_files_exist <- TRUE } else { + cat(" ! Old format detected\n") # Old format - keep as is summary_tables <- summary_data if (!is.null(summary_tables)) kpi_files_exist <- TRUE } } else { + cat(" ! Data frame format\n") # Data frame format or direct tables summary_tables <- summary_data if (!is.null(summary_tables)) kpi_files_exist <- TRUE } + } else { + cat(" ✗ Failed to load RDS - summary_data is NULL\n") } } else { safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING") + safe_log(paste("Attempted directory:", kpi_data_dir), "WARNING") + # Try searching the entire workspace as fallback + files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "kpi.*\\.rds$", recursive = TRUE, full.names = TRUE) + safe_log(paste("Found", length(files), "KPI RDS files in workspace"), "INFO") + if (length(files) > 0) { + safe_log(paste("Available files:", paste(basename(files), collapse = ", ")), "INFO") + } } if (!is.null(field_details_file) && file.exists(field_details_file)) { @@ -427,7 +488,7 @@ safe_log(paste("Week range:", week_start, "to", week_end)) ```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE, eval=TRUE} # Load CI index data with error handling tryCatch({ - CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) + CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) safe_log("Successfully loaded CI quadrant data") }, error = function(e) { @@ -683,125 +744,185 @@ tryCatch({ \newpage ## 1.2 Key Performance Indicators -```{r combined_kpi_table, echo=FALSE, eval=TRUE} +```{r combined_kpi_table, echo=TRUE} +# Debug: check what variables exist +cat("\n=== DEBUG: combined_kpi_table chunk ===\n") +cat(paste("exists('summary_data'):", exists("summary_data"), "\n")) +cat(paste("exists('kpi_files_exist'):", exists("kpi_files_exist"), "\n")) +if (exists("kpi_files_exist")) { + cat(paste("kpi_files_exist value:", kpi_files_exist, "\n")) +} +if (exists("summary_data")) { + cat(paste("summary_data class:", class(summary_data), "\n")) + if (is.list(summary_data)) { + cat(paste("summary_data names:", paste(names(summary_data), collapse = ", "), "\n")) + cat(paste("has field_analysis_summary:", "field_analysis_summary" %in% names(summary_data), "\n")) + } +} else { + cat("summary_data DOES NOT EXIST in this chunk's environment!\n") +} +cat("\n") + # Create summary KPI table from field_analysis_summary data # This shows: Phases, Triggers, Area Change, and Total Farm acreage -if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary" %in% names(summary_data)) { - field_analysis_summary <- summary_data$field_analysis_summary +if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) { + # Load field analysis data field_analysis_df <- summary_data$field_analysis + # If field_analysis_summary is NULL or doesn't exist, create it from field_analysis_df + if (is.null(summary_data$field_analysis_summary) || !("field_analysis_summary" %in% names(summary_data)) || + !is.data.frame(summary_data$field_analysis_summary)) { + cat("\nNote: field_analysis_summary not in RDS, creating from field_analysis...\n") + + # Create summary by aggregating by Status_Alert and Phase categories + # This groups fields by their phase and status to show distribution + phase_summary <- field_analysis_df %>% + filter(!is.na(Phase)) %>% + group_by(Phase) %>% + summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>% + mutate(Category = Phase) %>% + select(Category, Acreage) + + # Try to create Status trigger summary - use Status_Alert if available, otherwise use empty + trigger_summary <- tryCatch({ + field_analysis_df %>% + filter(!is.na(Status_Alert), Status_Alert != "") %>% + group_by(Status_Alert) %>% + summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>% + mutate(Category = Status_Alert) %>% + select(Category, Acreage) + }, error = function(e) { + cat("Could not create trigger summary:", e$message, "\n") + data.frame(Category = character(), Acreage = numeric()) + }) + + # Combine into summary + field_analysis_summary <- bind_rows(phase_summary, trigger_summary) + + cat(paste("Created summary with", nrow(field_analysis_summary), "category rows\n")) + + } else { + # Use existing summary from RDS + field_analysis_summary <- summary_data$field_analysis_summary + } + # Phase names and trigger names to extract from summary phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase") trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected", - "Germination Complete", "Germination Started", "No Active Trigger") + "Germination Complete", "Germination Started", "No Active Trigger", + "Ready for harvest-check", "Strong decline in crop health", "Harvested/bare") # Extract phase distribution - match on category names directly - phase_rows <- field_analysis_summary %>% - filter(Category %in% phase_names) %>% - select(Category, Acreage) %>% - mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1) + if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) { + phase_rows <- field_analysis_summary %>% + filter(Category %in% phase_names) %>% + select(Category, Acreage) %>% + mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1) - # Extract status triggers - match on category names directly - trigger_rows <- field_analysis_summary %>% - filter(Category %in% trigger_names) %>% - select(Category, Acreage) %>% - mutate(KPI_Group = "STATUS TRIGGERS", .before = 1) + # Extract status triggers - match on category names directly + trigger_rows <- field_analysis_summary %>% + filter(Category %in% trigger_names) %>% + select(Category, Acreage) %>% + mutate(KPI_Group = "STATUS TRIGGERS", .before = 1) - # Calculate area change from field_analysis data - total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE) - - # Parse Weekly_ci_change to determine improvement/decline - parse_ci_change <- function(change_str) { - if (is.na(change_str)) return(NA) - match <- regexpr("^[+-]?[0-9]+\\.?[0-9]*", change_str) - if (match > 0) { - return(as.numeric(substr(change_str, match, attr(match, "match.length")))) + # Calculate area change from field_analysis data + total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE) + + # Parse Weekly_ci_change to determine improvement/decline + parse_ci_change <- function(change_str) { + if (is.na(change_str)) return(NA) + match <- regexpr("^[+-]?[0-9]+\\.?[0-9]*", change_str) + if (match > 0) { + return(as.numeric(substr(change_str, match, attr(match, "match.length")))) + } + return(NA) } - return(NA) + + field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change) + + improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE) + declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE) + stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 & + field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE) + + improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0) + declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0) + stable_pct <- ifelse(total_acreage > 0, round(stable_acreage / total_acreage * 100, 1), 0) + + # Calculate percentages for phases and triggers + phase_pcts <- phase_rows %>% + mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%")) + + trigger_pcts <- trigger_rows %>% + mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%")) + + area_change_rows <- data.frame( + KPI_Group = "AREA CHANGE", + Category = c("Improving", "Stable", "Declining"), + Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)), + Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")), + stringsAsFactors = FALSE + ) + + # Total farm row + total_row <- data.frame( + KPI_Group = "TOTAL FARM", + Category = "Total Acreage", + Acreage = round(total_acreage, 2), + Percent = "100%", + stringsAsFactors = FALSE + ) + + # Combine all rows with percentages for all + combined_df <- bind_rows( + phase_pcts, + trigger_pcts, + area_change_rows, + total_row + ) + + # Create grouped display where KPI_Group name appears only once per group + combined_df <- combined_df %>% + group_by(KPI_Group) %>% + mutate( + KPI_display = if_else(row_number() == 1, KPI_Group, "") + ) %>% + ungroup() %>% + select(KPI_display, Category, Acreage, Percent) + + # Render as flextable with merged cells + ft <- flextable(combined_df) %>% + set_header_labels( + KPI_display = "KPI Category", + Category = "Item", + Acreage = "Acreage", + Percent = "Percent" + ) %>% + merge_v(j = "KPI_display") %>% + autofit() + + # Add horizontal lines after each KPI group (at cumulative row positions) + # Calculate row positions: row 1 is header, then data rows follow + phase_count <- nrow(phase_rows) + trigger_count <- nrow(trigger_rows) + area_count <- nrow(area_change_rows) + + # Add lines after phases, triggers, and area change groups (before totals) + if (phase_count > 0) { + ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1)) + } + if (trigger_count > 0) { + ft <- ft %>% hline(i = phase_count + trigger_count, border = officer::fp_border(width = 1)) + } + if (area_count > 0) { + ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1)) + } + + ft + } else { + cat("KPI summary data available but is empty/invalid.\n") } - - field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change) - - improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE) - declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE) - stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 & - field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE) - - improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0) - declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0) - stable_pct <- ifelse(total_acreage > 0, round(stable_acreage / total_acreage * 100, 1), 0) - - # Calculate percentages for phases and triggers - phase_pcts <- phase_rows %>% - mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%")) - - trigger_pcts <- trigger_rows %>% - mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%")) - - area_change_rows <- data.frame( - KPI_Group = "AREA CHANGE", - Category = c("Improving", "Stable", "Declining"), - Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)), - Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")), - stringsAsFactors = FALSE - ) - - # Total farm row - total_row <- data.frame( - KPI_Group = "TOTAL FARM", - Category = "Total Acreage", - Acreage = round(total_acreage, 2), - Percent = "100%", - stringsAsFactors = FALSE - ) - - # Combine all rows with percentages for all - combined_df <- bind_rows( - phase_pcts, - trigger_pcts, - area_change_rows, - total_row - ) - - # Create grouped display where KPI_Group name appears only once per group - combined_df <- combined_df %>% - group_by(KPI_Group) %>% - mutate( - KPI_display = if_else(row_number() == 1, KPI_Group, "") - ) %>% - ungroup() %>% - select(KPI_display, Category, Acreage, Percent) - - # Render as flextable with merged cells - ft <- flextable(combined_df) %>% - set_header_labels( - KPI_display = "KPI Category", - Category = "Item", - Acreage = "Acreage", - Percent = "Percent" - ) %>% - merge_v(j = "KPI_display") %>% - autofit() - - # Add horizontal lines after each KPI group (at cumulative row positions) - # Calculate row positions: row 1 is header, then data rows follow - phase_count <- nrow(phase_rows) - trigger_count <- nrow(trigger_rows) - area_count <- nrow(area_change_rows) - - # Add lines after phases, triggers, and area change groups (before totals) - if (phase_count > 0) { - ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1)) - } - if (trigger_count > 0) { - ft <- ft %>% hline(i = phase_count + trigger_count, border = officer::fp_border(width = 1)) - } - if (area_count > 0) { - ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1)) - } - - ft } else { cat("KPI summary data not available.\n") } @@ -854,11 +975,11 @@ if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) { if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) { field_analysis_table <- summary_data$field_analysis - # Extract fields with status triggers (non-null) + # Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger) alerts_data <- field_analysis_table %>% - filter(!is.na(Status_trigger), Status_trigger != "") %>% - select(Field_id, Status_trigger) %>% - rename(Field = Field_id, Alert = Status_trigger) + filter(!is.na(Status_Alert), Status_Alert != "") %>% + select(Field_id, Status_Alert) %>% + rename(Field = Field_id, Alert = Status_Alert) if (nrow(alerts_data) > 0) { # Format alert messages for display @@ -892,22 +1013,50 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na # The report renders KPI tables and field summaries from that data ``` -```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE, eval=TRUE} -# Load field boundaries from parameters -field_boundaries_sf <- sf::st_make_valid(field_boundaries_sf) +```{r load_field_boundaries, message=FALSE, warning=FALSE, include=FALSE} +# Load field boundaries from parameters (with fallback if geometry is invalid) +field_boundaries_loaded <- FALSE + tryCatch({ - AllPivots0 <- field_boundaries_sf %>% - dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names - safe_log("Successfully loaded field boundaries") - - # Prepare merged field list for use in summaries - AllPivots_merged <- AllPivots0 %>% - dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names - dplyr::group_by(field) %>% - dplyr::summarise(.groups = 'drop') - + # Try to load and validate the field boundaries + if (exists("field_boundaries_sf") && !is.null(field_boundaries_sf)) { + # Try to filter - this will trigger geometry validation + AllPivots0 <- field_boundaries_sf %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) + + # If successful, also create merged field list + AllPivots_merged <- AllPivots0 %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) %>% + dplyr::group_by(field) %>% + dplyr::summarise(.groups = 'drop') + + field_boundaries_loaded <- TRUE + safe_log("✓ Successfully loaded field boundaries") + } else { + safe_log("⚠ field_boundaries_sf not found in environment") + } }, error = function(e) { - stop("Error loading field boundaries: ", e$message) + # If geometry is invalid, try to fix or skip + safe_log(paste("⚠ Error loading field boundaries:", e$message), "WARNING") + safe_log("Attempting to fix invalid geometries using st_make_valid()...", "WARNING") + + tryCatch({ + # Try to repair invalid geometries + field_boundaries_sf_fixed <<- sf::st_make_valid(field_boundaries_sf) + AllPivots0 <<- field_boundaries_sf_fixed %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) + + AllPivots_merged <<- AllPivots0 %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) %>% + dplyr::group_by(field) %>% + dplyr::summarise(.groups = 'drop') + + field_boundaries_loaded <<- TRUE + safe_log("✓ Fixed invalid geometries and loaded field boundaries") + }, error = function(e2) { + safe_log(paste("⚠ Could not repair geometries:", e2$message), "WARNING") + safe_log("Continuing without field boundary data", "WARNING") + }) }) ``` \newpage diff --git a/r_app/MANUAL_PIPELINE_RUNNER.R b/r_app/MANUAL_PIPELINE_RUNNER.R new file mode 100644 index 0000000..2cceb43 --- /dev/null +++ b/r_app/MANUAL_PIPELINE_RUNNER.R @@ -0,0 +1,631 @@ +# ============================================================================== +# SMARTCANE MANUAL PIPELINE RUNNER +# ============================================================================== +# +# This file documents all pipeline steps as MANUAL COPY-PASTE COMMANDS. +# Do NOT run this script directly - instead, copy individual commands and +# paste them into your PowerShell terminal. +# +# This approach allows you to: +# - Run steps one at a time and inspect outputs +# - Re-run failed steps without re-running successful ones +# - Monitor progress between steps +# - Troubleshoot issues more easily than with automated pipeline +# +# ============================================================================== +# PIPELINE SEQUENCE (IN ORDER) +# ============================================================================== +# +# 1. Python: Download Planet satellite imagery (optional - only if new data needed) +# 2. R10: Split farm TIFFs into per-field directory structure +# 3. R20: Extract Canopy Index (CI) from 4-band imagery +# 4. R30: Interpolate growth model (smooth CI time series) +# 5. R21: Convert CI data to CSV format for Python +# 6. Python31: Harvest imminent predictions (optional - requires harvest.xlsx) +# 7. R40: Create weekly mosaic TIFFs +# 8. R80: Calculate KPIs (field uniformity, trends, stress) +# 9. R90/91: Generate Word reports (optional - Agronomic or Cane Supply) +# +# ============================================================================== +# BEFORE YOU START +# ============================================================================== +# +# 1. Open PowerShell in the smartcane root directory: +# C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\ +# +# 2. Define your parameters ONCE at the top of the session: +# +# $PROJECT = "angata" # Project: angata, chemba, xinavane, esa, simba +# $END_DATE = "2026-02-04" # YYYY-MM-DD format (e.g., 2026-02-04) +# $OFFSET = 7 # Days to look back (e.g., 7 for one week) +# $WEEK = 6 # ISO week number (1-53) - auto-calculated from END_DATE +# $YEAR = 2026 # ISO year - auto-calculated from END_DATE +# +# 3. Use these variables in the commands below by replacing [PROJECT], [END_DATE], etc. +# +# ============================================================================== +# COMMAND REFERENCE +# ============================================================================== + +# ============================================================================== +# STEP 0: PYTHON - Download Planet Satellite Imagery (OPTIONAL) +# ============================================================================== +# +# PURPOSE: +# Download 4-band (RGB+NIR) satellite imagery from Planet Labs API +# Downloads to: laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif +# +# WHEN TO RUN: +# - Only needed if you have new dates to process +# - Pipeline skips dates already in merged_tif/ or field_tiles/ +# - First-time setup: download for your date range +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# DATE: YYYY-MM-DD format (e.g., 2026-02-04) +# RESOLUTION: 3 meters (default) - can also use 5, 10 +# --cleanup: Delete intermediate files after download +# --clear-all: Clear all output folders before downloading +# +# COMMAND #1 - Single Date Download: +# +# cd python_app +# python 00_download_8band_pu_optimized.py [PROJECT] --date [DATE] --resolution 3 --cleanup +# +# Example: +# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup +# +# COMMAND #2 - Batch Download (Multiple Dates): +# For date ranges, MUST use download_planet_missing_dates.py (not Script 00) +# +# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT] +# +# Example: +# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata +# +# IMPORTANT DISTINCTION: +# - Script 00 (00_download_8band_pu_optimized.py): Only supports --date flag for SINGLE dates +# - Script download_planet_missing_dates.py: Supports --start/--end for DATE RANGES +# Script 00 does NOT have --start/--end flags despite documentation suggestion +# Use the correct script for your use case! +# +# EXPECTED OUTPUT: +# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file) +# +# Note: Planet API requires authentication (PLANET_API_KEY environment variable) +# Cost: ~1,500-2,000 PU per date +# +# ============================================================================ + + +# ============================================================================== +# STEP 1: R10 - Create Per-Field TIFF Structure +# ============================================================================== +# +# PURPOSE: +# Split farm-wide GeoTIFFs into per-field directory structure. +# Transforms: merged_tif/{DATE}.tif (single file) +# → field_tiles/{FIELD_ID}/{DATE}.tif (per-field files) +# This enables clean, scalable processing in downstream scripts. +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif (4-band RGB+NIR) +# - Field boundaries: laravel_app/storage/app/{PROJECT}/pivot.geojson +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif +# - One TIFF per field per date (1185 fields × N dates in Angata) +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba (default: angata) +# END_DATE: YYYY-MM-DD format (e.g., 2026-02-09, default: today) +# OFFSET: Days to look back (e.g., 7 for one week, default: 7) +# +# COMMAND #1 - Default (All dates, current date, 7-day window): +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata +# +# COMMAND #2 - Specific Date Range: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] [END_DATE] [OFFSET] +# +# Example (one week back from 2026-02-09): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7 +# +# Example (two weeks back from 2026-02-09): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 14 +# +# EXPECTED OUTPUT: +# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files) +# Storage location: laravel_app/storage/app/angata/field_tiles/ +# Script execution time: 5-10 minutes (depends on number of dates) +# +# ============================================================================ + + +# ============================================================================== +# STEP 2: R20 - Extract Chlorophyll Index (CI) +# ============================================================================== +# +# PURPOSE: +# Calculate Chlorophyll Index from 4-band imagery and create 5-band output TIFFs. +# Also extracts CI statistics per sub_field for daily tracking. +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif (4-band) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif (5-band with CI) +# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD_ID}/{DATE}.rds +# +# EXPECTED BEHAVIOR: +# If field_tiles_CI/ or daily_vals/ missing files, Script 20 will process them +# Script 20 skips files that already exist (to avoid re-processing) +# ⚠️ IF NOT ALL FILES CREATED: See troubleshooting section below +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - date range end +# OFFSET: Days to look back (e.g., 7 for one week window) +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET] +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-09 7 +# +# EXPECTED OUTPUT: +# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/ +# Example: 1185 fields × 8 dates = 9,480 files in field_tiles_CI/ +# Storage location: laravel_app/storage/app/angata/field_tiles_CI/ +# Script execution time: 10-20 minutes (depends on number of dates+fields) +# +# NOTES: +# Script 20 processes dates between (END_DATE - OFFSET) and END_DATE +# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates) +# To process all existing merged_tif files: Use large OFFSET (e.g., 365) +# +# ============================================================================ + + +# ============================================================================== +# STEP 3: R30 - Interpolate Growth Model +# ============================================================================== +# +# PURPOSE: +# Smooth CI time series using LOESS interpolation to fill gaps. +# Creates continuous growth curves for each field across all measurement dates. +# Enables trend analysis, yield prediction, and cumulative growth metrics. +# +# INPUT: +# - Daily CI statistics from Script 20 (field_tiles_CI/ per-field RDS files) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# - (This is the growth model output used by Script 21 and 80) +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R [PROJECT] +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata +# +# EXPECTED OUTPUT: +# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds +# Contains: Interpolated CI data for all fields (wide format) +# +# ============================================================================ + + +# ============================================================================== +# STEP 4: R21 - Convert CI RDS to CSV (Python Format) +# ============================================================================== +# +# PURPOSE: +# Convert growth model output from R's RDS format to Python-compatible CSV. +# Transforms from wide format (fields × dates) to long format (one row per field-date pair). +# Prepares data for Python harvest detection models. +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# (Output from Script 30) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv +# - Columns: field, sub_field, Date, FitData, DOY, value +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R [PROJECT] +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata +# +# EXPECTED OUTPUT: +# File: ci_data_for_python.csv (~5-10 MB) +# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows) +# +# ============================================================================ + + +# ============================================================================== +# STEP 5: PYTHON31 - Harvest Imminent Predictions (OPTIONAL) +# ============================================================================== +# +# PURPOSE: +# Predict which fields are approaching harvest in the next 28 days. +# Uses neural network (Model 307) trained on historical harvest dates. +# Generates weekly probability scores for operational harvest scheduling. +# +# REQUIRES: +# - harvest.xlsx with field planting/harvest dates +# - ci_data_for_python.csv from Script 21 +# - PyTorch environment (conda pytorch_gpu) +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/Data/harvest.xlsx +# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv +# - Columns: field, sub_field, imminent_prob, detected_prob, week, year, as_of_date, num_days +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py [PROJECT] +# +# Example: +# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py angata +# +# EXPECTED OUTPUT: +# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv +# Rows: One per field (e.g., 1185 rows for Angata) +# +# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete +# +# ============================================================================ + + +# ============================================================================== +# STEP 6: R40 - Create Weekly Mosaic TIFFs +# ============================================================================== +# +# PURPOSE: +# Aggregate daily per-field CI TIFFs into weekly mosaics. +# Handles multiple dates (full week) with maximum CI value per pixel. +# Creates 5-band output for reporting and KPI calculations. +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif +# (Daily per-field CI TIFFs from Script 20) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_{WW}_{YYYY}.tif +# - One per field per week (e.g., 1185 fields × 1 week = 1,185 files) +# +# PARAMETERS: +# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - determines ISO week +# OFFSET: Days to look back (e.g., 7 for one week window) +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R [END_DATE] [OFFSET] [PROJECT] +# +# Example (one week window): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata +# +# EXPECTED OUTPUT: +# Location: laravel_app/storage/app/angata/weekly_mosaic/ +# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif +# Files created: #fields (e.g., 1185 for Angata) +# Storage: ~50-100 MB total for all mosaic TIFFs +# Script execution time: 5-10 minutes +# +# NOTE: Files are named with ISO week number (WW) and year (YYYY) +# Week calculation is automatic based on END_DATE +# +# ============================================================================ + + +# ============================================================================== +# STEP 7: R80 - Calculate Key Performance Indicators (KPIs) +# ============================================================================== +# +# PURPOSE: +# Calculate per-field metrics from weekly mosaic TIFFs: +# - Field uniformity (CV - Coefficient of Variation) +# - Growth trends (4-week and 8-week) +# - Area change detection +# - TCH forecast +# - Spatial clustering (weed/stress detection) +# - Generates Excel export for dashboards and reporting +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_*.tif +# - Field boundaries (pivot.geojson) +# - Harvest data (harvest.xlsx) +# - Historical stats cache (RDS from previous weeks) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx +# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds (cached stats) +# - 21 columns with field-level KPIs and alerts +# +# PARAMETERS: +# END_DATE: Report date in YYYY-MM-DD format (default: today) +# PROJECT: Project name: angata, chemba, xinavane, esa, simba (default: angata) +# OFFSET: Days to look back for historical comparison (default: 7, for backward compatibility) +# +# COMMAND #1 - Current Date & Default Project (Auto-detects TODAY): +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R +# +# COMMAND #2 - Specific Date & Project: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET] +# +# Example (2026-02-09, angata, 7-day lookback): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-09 angata 7 +# +# EXPECTED OUTPUT: +# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx +# Rows: One per field (e.g., 1185 for Angata) +# Columns: 21 KPI columns (uniformity, trend, alerts, etc.) +# Location: laravel_app/storage/app/angata/output/ +# Script execution time: 10-20 minutes +# +# EXPECTED COLUMNS: +# field, sub_field, phase, cv (uniformity), ci_mean, area_ha, area_ac, +# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence, +# spatial_cluster, alert_urgency, alert_type, alert_message, etc. +# +# CRITICAL DIFFERENCE - R80 Uses Different Argument Order Than R40: +# R40 order: [END_DATE] [OFFSET] [PROJECT] +# R80 order: [END_DATE] [PROJECT] [OFFSET] +# These are NOT the same! Ensure correct order for each script. +# +# ============================================================================ + + +# ============================================================================== +# STEP 8: R90/R91 - Generate Word Report (OPTIONAL) +# ============================================================================== +# +# PURPOSE: +# Generate formatted Word report (.docx) with: +# - KPI summary tables and charts +# - Per-field performance metrics +# - Alerts and recommendations +# - Interpretation guides +# +# Client-Specific Reports: +# - R90: Agronomic Support (for AURA project) +# - R91: Cane Supply (for ANGATA, CHEMBA, XINAVANE, ESA) +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx +# (from Script 80) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_*.docx +# - Formatted Word document (~5-10 MB) +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) +# REPORT_TYPE: agronomic or cane_supply (determines which Rmd file to render) +# +# COMMAND #1 - AGRONOMIC REPORT (AURA project): +# From R console or R script: +# +# rmarkdown::render( +# "r_app/90_CI_report_with_kpis_simple.Rmd", +# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")), +# output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx", +# output_dir = "laravel_app/storage/app/angata/reports" +# ) +# +# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA): +# From R console or R script: +# +# rmarkdown::render( +# "r_app/91_CI_report_with_kpis_Angata.Rmd", +# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")), +# output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx", +# output_dir = "laravel_app/storage/app/angata/reports" +# ) +# +# EXPECTED OUTPUT: +# File: SmartCane_Report_*_{PROJECT}_{DATE}.docx +# Location: laravel_app/storage/app/{PROJECT}/reports/ +# Script execution time: 5-10 minutes +# +# NOTE: +# These are R Markdown files and cannot be run directly via Rscript +# Use rmarkdown::render() from an R interactive session or wrapper script +# See run_full_pipeline.R for an automated example +# +# ============================================================================ + + +# ============================================================================== +# QUICK REFERENCE: Common Workflows +# ============================================================================== +# +# WORKFLOW A: Weekly Update (Most Common) +# ───────────────────────────────────────────────────────────────────────── +# Goal: Process latest week of data through full pipeline +# +# Parameters: +# $PROJECT = "angata" +# $END_DATE = "2026-02-04" # Today or latest date available +# $OFFSET = 7 # One week back +# +# Steps: +# 1. SKIP Python download (if you already have data) +# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 7 +# (Argument order: [PROJECT] [END_DATE] [OFFSET]) +# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7 +# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata +# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata +# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata +# (Argument order: [END_DATE] [OFFSET] [PROJECT]) +# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-04 angata 7 +# (Argument order: [END_DATE] [PROJECT] [OFFSET] - DIFFERENT from R40!) +# 8. OPTIONAL R91 (Cane Supply) - Use automated runner: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R +# OR from R console: +# rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd", +# params=list(data_dir="angata", report_date=as.Date("2026-02-04")), +# output_file="SmartCane_Report_cane_supply_angata_2026-02-04.docx", +# output_dir="laravel_app/storage/app/angata/reports") +# +# Execution time: ~60-90 minutes total +# +# +# WORKFLOW B: Initial Setup (Large Backfill) +# ───────────────────────────────────────────────────────────────────────── +# Goal: Process multiple weeks of historical data +# +# Steps: +# 1. Python download (your entire date range) +# 2. Run R10 with large offset to process all historical dates: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 365 +# (This processes from 2025-02-04 to 2026-02-04, covering entire year) +# 3. Run R20 with large offset to process all historical dates: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365 +# (This processes from 2025-02-04 to 2026-02-04, covering entire year) +# 4. Run R30 once (growth model full season) +# 5. Run R21 once (CSV export) +# 6. Run R40 with specific week windows as needed +# 7. Run R80 for each week you want KPIs for + +# 6. For each week, run: +# - R40 with different END_DATE values (one per week) +# - R80 with different WEEK/YEAR values (one per week) +# - R91 optional (one per week report) +# +# Pro tip: Script R40 with offset=14 covers two weeks at once +# Then R40 again with offset=7 for just one week +# +# +# WORKFLOW C: Troubleshooting (Check Intermediate Outputs) +# ───────────────────────────────────────────────────────────────────────── +# Goal: Verify outputs before moving to next step +# +# After R10: Check field_tiles/{FIELD_ID}/ has #dates files +# After R20: Check field_tiles_CI/{FIELD_ID}/ has same #dates files +# After R30: Check Data/extracted_ci/cumulative_vals/ has All_pivots_*.rds +# After R40: Check weekly_mosaic/{FIELD_ID}/ has week_WW_YYYY.tif per week +# After R80: Check output/ has {PROJECT}_field_analysis_week*.xlsx +# +# ============================================================================ + +# ============================================================================== +# TROUBLESHOOTING +# ============================================================================== +# +# ISSUE: R20 not processing all field_tiles files +# ──────────────────────────────────────────────── +# Symptom: field_tiles has 496 files, field_tiles_CI only has 5 +# +# Possible causes: +# 1. Source files incomplete or corrupted +# 2. Script 20 skips because CI TIFF already exists (even if incomplete) +# 3. Partial run from previous attempt +# +# Solutions: +# 1. Delete the small number of files in field_tiles_CI/{FIELD}/ (don't delete all!) +# rm laravel_app/storage/app/angata/field_tiles_CI/{fieldnum}/* +# 2. Re-run Script 20 +# 3. If still fails, delete field_tiles_CI completely and re-run Script 20 +# rm -r laravel_app/storage/app/angata/field_tiles_CI/ +# +# ISSUE: Script 80 says "No per-field mosaic files found" +# ──────────────────────────────────────────────────────── +# Symptom: R80 fails to calculate KPIs +# +# Possible causes: +# 1. Script 40 hasn't run yet (weekly_mosaic doesn't exist) +# 2. Wrong END_DATE or WEEK/YEAR combination +# 3. weekly_mosaic/{FIELD}/ directory missing (old format?) +# +# Solutions: +# 1. Ensure Script 40 has completed: Check weekly_mosaic/{FIELD}/ exists with week_WW_YYYY.tif +# 2. Verify END_DATE is within date range of available CI data +# 3. For current week: End date must be THIS week (same ISO week as today) +# +# ISSUE: Python download fails ("Not authorized") +# ──────────────────────────────────────────────── +# Symptom: python 00_download_8band_pu_optimized.py fails with authentication error +# +# Cause: PLANET_API_KEY environment variable not set +# +# Solution: +# 1. Save your Planet API key: $env:PLANET_API_KEY = "your_key_here" +# 2. Verify: $env:PLANET_API_KEY (should show your key) +# 3. Try download again +# +# ISSUE: R30 takes too long +# ────────────────────────── +# Symptom: Script 30 running for >30 minutes +# +# Cause: LOESS interpolation is slow with many dates/fields +# +# Solution: +# 1. This is normal - large date ranges slow down interpolation +# 2. Subsequent runs are faster (cached results) +# 3. If needed: reduce offset or run fewer weeks at a time +# +# ============================================================================== + +# ============================================================================== +# SUMMARY OF FILES CREATED BY EACH SCRIPT +# ============================================================================== +# +# Script 10 creates: +# laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD}/{DATE}.tif +# +# Script 20 creates: +# laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD}/{DATE}.tif +# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds +# +# Script 30 creates: +# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# +# Script 21 creates: +# laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv +# +# Python 31 creates: +# laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv +# +# Script 40 creates: +# laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD}/{DATE}/week_{WW}_{YYYY}.tif +# +# Script 80 creates: +# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx +# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds +# +# Script 90/91 creates: +# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx +# +# ============================================================================== + diff --git a/r_app/_SCRIPT_HEADER_TEMPLATE.R b/r_app/_SCRIPT_HEADER_TEMPLATE.R new file mode 100644 index 0000000..83dac6f --- /dev/null +++ b/r_app/_SCRIPT_HEADER_TEMPLATE.R @@ -0,0 +1,54 @@ +# ============================================================================ +# SCRIPT XX: [DESCRIPTIVE TITLE] +# ============================================================================ +# PURPOSE: +# [What this script does in 2-3 sentences. Example: Downloads satellite +# imagery from Planet API, processes 4-band RGB+NIR data, and saves +# merged GeoTIFFs for use by downstream analysis stages.] +# +# INPUT DATA: +# - Source: [Which directory/files it reads. Example: Planet API, hardcoded bbox] +# - Format: [TIFF, RDS, GeoJSON, etc. Example: 4-band uint16 GeoTIFF] +# - Location: [Full path example] +# +# OUTPUT DATA: +# - Destination: [Which directory/files it creates. Example: laravel_app/storage/app/{project}/merged_tif/] +# - Format: [TIFF, RDS, CSV, etc. Example: Single-band float32 GeoTIFF] +# - Naming convention: [Example: {YYYY-MM-DD}.tif] +# +# USAGE: +# Rscript XX_script_name.R [arg1] [arg2] [arg3] +# +# Example (Windows PowerShell): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/XX_script_name.R angata 2026-01-15 +# +# PARAMETERS: +# - arg1: [Description, type, and valid values. Example: project (character) - angata, chemba, xinavane, esa, simba] +# - arg2: [Description, type, and valid values. Example: date (character, optional) - ISO format YYYY-MM-DD; default today] +# +# CLIENT TYPES: +# - cane_supply (ANGATA): [Yes/No - if Yes, briefly explain any differences] +# - agronomic_support (AURA): [Yes/No - if Yes, briefly explain any differences] +# +# DEPENDENCIES: +# - Packages: [dplyr, tidyr, terra, sf, readr, writexl, etc.] +# - Utils files: [parameters_project.R, 00_common_utils.R, XX_utils.R, etc.] +# - External data: [harvest.xlsx, pivot.geojson, etc.] +# - Data directories: [laravel_app/storage/app/{project}/] +# +# NOTES: +# [Any special considerations, assumptions, or future improvements. +# Example: Cloud filtering uses CI >= 0.5 threshold. Multi-field support +# implemented via field geometry masking from pivot.geojson.] +# +# RELATED ISSUES: +# SC-XXX: [Brief description of related work] +# +# HISTORY: +# 2026-02-03: [Description of change, if refactored or enhanced] +# ============================================================================ + +# NOTE: This is a TEMPLATE file for documentation purposes only. +# When creating a new script or updating an existing one, copy this template +# and fill in all sections with accurate information about your specific script. +# Then delete this comment block. diff --git a/r_app/experiments/ci_extraction_and_yield_prediction.R b/r_app/experiments/ci_extraction_and_yield_prediction.R index 83f233b..3334498 100644 --- a/r_app/experiments/ci_extraction_and_yield_prediction.R +++ b/r_app/experiments/ci_extraction_and_yield_prediction.R @@ -156,9 +156,10 @@ calculate_ci <- function(raster_obj) { red_band <- raster_obj[[3]] nir_band <- raster_obj[[4]] - # CI formula: (NIR / Red) - 1 - # This highlights chlorophyll content in vegetation - ci_raster <- (nir_band / red_band) - 1 + # CI formula: (NIR / Green) - 1, NOT (NIR / Red) - 1 + # *** CRITICAL: Use GREEN band for Chlorophyll Index, NOT RED *** + # GREEN band is essential for proper chlorophyll-sensitive calculation + ci_raster <- (nir_band / green_band) - 1 # Filter extreme values that may result from division operations ci_raster[ci_raster > 10] <- 10 # Cap max value diff --git a/r_app/experiments/crop_messaging/young_field_analysis.R b/r_app/experiments/crop_messaging/young_field_analysis.R index e3ef279..5319a37 100644 --- a/r_app/experiments/crop_messaging/young_field_analysis.R +++ b/r_app/experiments/crop_messaging/young_field_analysis.R @@ -68,8 +68,9 @@ calculate_enhanced_indices <- function(red, green, blue, nir) { grvi <- green / red names(grvi) <- "GRVI" - # 6. Chlorophyll Index (current CI - for comparison) - ci <- nir / red - 1 + # 6. Chlorophyll Index (CI = NIR / Green - 1, NOT NIR/Red) + # *** CRITICAL: Correct formula uses GREEN band, not RED *** + ci <- nir / green - 1 names(ci) <- "CI" return(list( diff --git a/r_app/kpi_utils.R b/r_app/old_scripts/kpi_utils.R similarity index 90% rename from r_app/kpi_utils.R rename to r_app/old_scripts/kpi_utils.R index 6dc31af..b960d1f 100644 --- a/r_app/kpi_utils.R +++ b/r_app/old_scripts/kpi_utils.R @@ -69,31 +69,94 @@ calculate_week_numbers <- function(report_date = Sys.Date()) { #' @param year Year #' @param mosaic_dir Directory containing weekly mosaics #' @return Terra raster with CI band, or NULL if file not found +# Helper function to load CI raster for a specific field (handles both single-file and per-field architectures) +load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) { + # Check if this is per-field loading mode + is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field") + + if (is_per_field) { + # Per-field architecture: load this specific field's mosaic + per_field_dir <- attr(ci_raster_or_obj, "per_field_dir") + week_file <- attr(ci_raster_or_obj, "week_file") + field_mosaic_path <- file.path(per_field_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_mosaic <- terra::rast(field_mosaic_path) + # Extract CI band (5th band) if multi-band, otherwise use as-is + if (terra::nlyr(field_mosaic) >= 5) { + return(field_mosaic[[5]]) + } else { + return(field_mosaic[[1]]) + } + }, error = function(e) { + safe_log(paste("Error loading per-field mosaic for", field_name, ":", e$message), "WARNING") + return(NULL) + }) + } else { + safe_log(paste("Per-field mosaic not found for", field_name), "WARNING") + return(NULL) + } + } else { + # Single-file architecture: crop from loaded raster + if (!is.null(field_vect)) { + return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE)) + } else { + return(ci_raster_or_obj) + } + } +} + load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { week_file <- sprintf("week_%02d_%d.tif", week_num, year) week_path <- file.path(mosaic_dir, week_file) - if (!file.exists(week_path)) { - safe_log(paste("Weekly mosaic not found:", week_path), "WARNING") - return(NULL) + # FIRST: Try to load single-file mosaic (legacy approach) + if (file.exists(week_path)) { + tryCatch({ + mosaic_raster <- terra::rast(week_path) + ci_raster <- mosaic_raster[[5]] # CI is the 5th band + names(ci_raster) <- "CI" + safe_log(paste("Loaded weekly mosaic (single-file):", week_file)) + return(ci_raster) + }, error = function(e) { + safe_log(paste("Error loading mosaic:", e$message), "ERROR") + return(NULL) + }) } - tryCatch({ - mosaic_raster <- terra::rast(week_path) - # Extract CI band by name or position - if ("CI" %in% names(mosaic_raster)) { - ci_raster <- mosaic_raster[["CI"]] - } else { - # Fallback: assume last band is CI (after Red, Green, Blue, NIR) - ci_raster <- mosaic_raster[[nlyr(mosaic_raster)]] + # SECOND: Per-field architecture - store mosaic_dir path for later per-field loading + # Don't try to merge - just return the directory path so field-level functions can load per-field + if (dir.exists(mosaic_dir)) { + field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + # Check if any field has this week's mosaic + found_any <- FALSE + for (field in field_dirs) { + field_mosaic_path <- file.path(mosaic_dir, field, week_file) + if (file.exists(field_mosaic_path)) { + found_any <- TRUE + break + } } - names(ci_raster) <- "CI" - safe_log(paste("Loaded weekly mosaic:", week_file)) - return(ci_raster) - }, error = function(e) { - safe_log(paste("Error loading mosaic:", e$message), "ERROR") - return(NULL) - }) + + if (found_any) { + safe_log(paste("Found per-field mosaics for week", sprintf("%02d", week_num), year, + "- will load per-field on demand")) + # Return a special object that indicates per-field loading is needed + # Store the mosaic_dir path in the raster's metadata + dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA) + attr(dummy_raster, "per_field_dir") <- mosaic_dir + attr(dummy_raster, "week_file") <- week_file + attr(dummy_raster, "is_per_field") <- TRUE + return(dummy_raster) + } + } + + # If we get here, no mosaic found + safe_log(paste("Weekly mosaic not found for week", sprintf("%02d", week_num), year), "WARNING") + return(NULL) } # Function to prepare predictions with consistent naming and formatting @@ -141,12 +204,16 @@ calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) { # Extract field boundary field_vect <- field_boundaries_vect[i] - # crop ci_raster with field_vect and use that for ci_values - cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE) + # Load appropriate CI raster using helper function + cropped_raster <- load_field_ci_raster(ci_raster, field_name, field_vect) # Extract CI values for this field using helper function - field_values <- extract_ci_values(cropped_raster, field_vect) - valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] + if (!is.null(cropped_raster)) { + field_values <- extract_ci_values(cropped_raster, field_vect) + valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] + } else { + valid_values <- c() + } # If all valid values are 0 (cloud), fill with NA row if (length(valid_values) == 0 || all(valid_values == 0)) { @@ -286,9 +353,18 @@ calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries) # Extract field boundary field_vect <- field_boundaries_vect[i] - # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) - previous_values <- extract_ci_values(previous_ci, field_vect) + # Load appropriate CI rasters using helper function + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & @@ -569,8 +645,18 @@ calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundari sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] - # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) + # Load appropriate CI rasters using helper function + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } previous_values <- extract_ci_values(previous_ci, field_vect) # Clean values @@ -728,8 +814,17 @@ calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundarie } # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) - previous_values <- extract_ci_values(previous_ci, field_vect) + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & @@ -810,8 +905,15 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] + # Load appropriate CI raster using helper function + field_ci <- load_field_ci_raster(ci_raster, field_name, field_vect) + # Extract CI values using helper function - ci_values <- extract_ci_values(ci_raster, field_vect) + if (!is.null(field_ci)) { + ci_values <- extract_ci_values(field_ci, field_vect) + } else { + ci_values <- c() + } valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] if (length(valid_values) > 1) { diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index 5890a94..5e6b2e9 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -1,288 +1,362 @@ -# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R +# ============================================================================== +# PARAMETERS_PROJECT_2.R (CLEANED VERSION) +# ============================================================================== +# PURPOSE: +# Project configuration, directory structure setup, and helper functions +# for centralized path management and project initialization. # -# PARAMETERS_PROJECT.R -# ==================== -# This script defines project parameters, directory structures, and loads field boundaries. -# It establishes all the necessary paths and creates required directories for the SmartCane project. +# SECTION 1: Libraries & Dependencies +# SECTION 2: Client Type Mapping & Configuration +# SECTION 3: Directory Structure Setup +# SECTION 4: Date/Week Utility Functions (centralized) +# SECTION 5: Field Boundary & Harvest Data Loaders +# SECTION 6: Project Initialization & Logging +# SECTION 7: Mosaic & KPI Verification Helpers +# +# NOTE: Duplicate functions (safe_log, smartcane_debug, smartcane_warn, +# load_field_boundaries, date_list, repair_geojson_geometries) +# have been REMOVED - they belong in 00_common_utils.R. +# Source 00_common_utils.R after parameters_project.R to get those. +# ============================================================================== -# 1. Load required libraries -# ------------------------- +# ============================================================================== +# SECTION 1: LIBRARIES & DEPENDENCIES +# ============================================================================== suppressPackageStartupMessages({ library(here) library(readxl) library(sf) library(dplyr) library(tidyr) + library(lubridate) library(jsonlite) # For reading tiling_config.json }) -# 2. Smart detection for tile-based vs single-file mosaic approach -# ---------------------------------------------------------------- -detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { - # PRIORITY 1: Check for tiling_config.json metadata file from script 10 - # This is the most reliable source since script 10 explicitly records its decision - - if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) { - # Try to find tiling_config.json in any grid-size subfolder - config_files <- list.files(daily_tiles_split_dir, - pattern = "tiling_config\\.json$", - recursive = TRUE, - full.names = TRUE) - - if (length(config_files) > 0) { - # Found a config file - use the most recent one - config_file <- config_files[which.max(file.info(config_files)$mtime)] - - tryCatch({ - config_json <- jsonlite::read_json(config_file) - return(list( - has_tiles = config_json$has_tiles %||% TRUE, - detected_tiles = character(), - total_files = 0, - source = "tiling_config.json", - grid_size = config_json$grid_size %||% "unknown" - )) - }, error = function(e) { - warning("Error reading tiling_config.json: ", e$message) - # Fall through to file-based detection - }) - } +# ============================================================================== +# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION +# ============================================================================== +# Maps project names to client types for pipeline control +# This determines which scripts run and what outputs they produce + +CLIENT_TYPE_MAP <- list( + "angata" = "cane_supply", + "aura" = "agronomic_support", + "chemba" = "cane_supply", + "xinavane" = "cane_supply", + "esa" = "cane_supply" +) + +#' Get client type for a project +#' @param project_name Character project name +#' @return Character client type ("cane_supply" or "agronomic_support") +get_client_type <- function(project_name) { + client_type <- CLIENT_TYPE_MAP[[project_name]] + if (is.null(client_type)) { + warning(sprintf("Project '%s' not in CLIENT_TYPE_MAP - defaulting to 'cane_supply'", project_name)) + return("cane_supply") } - - # PRIORITY 2: File-based detection (fallback if metadata not found) - # Check if merged_final_tif/ contains tile-named files OR grid-size subdirectories - - if (!dir.exists(merged_final_tif_dir)) { - return(list( - has_tiles = FALSE, - detected_tiles = character(), - total_files = 0, - source = "directory_not_found" - )) - } - - # First check if there are grid-size subdirectories (5x5, 10x10, etc.) - # This indicates the tiles are organized: merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif - grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) - - if (length(grid_patterns) > 0) { - # Found grid-size subdirectories - tiles exist! - grid_size <- grid_patterns[1] - grid_dir <- file.path(merged_final_tif_dir, grid_size) - - # List sample tile files from the grid directory - sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3] - - return(list( - has_tiles = TRUE, - detected_tiles = sample_tiles, - total_files = length(sample_tiles), - source = "grid_subdirectory_detection", - grid_size = grid_size, - grid_path = grid_dir - )) - } - - # Fall back to checking for tile-named files directly in merged_final_tif - # List all .tif files in merged_final_tif - tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE) - - if (length(tif_files) == 0) { - return(list( - has_tiles = FALSE, - detected_tiles = character(), - total_files = 0, - source = "no_files_found" - )) - } - - # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits) - # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif - tile_pattern <- "_(\\d{2})\\.tif$" - tile_files <- tif_files[grepl(tile_pattern, tif_files)] - - has_tiles <- length(tile_files) > 0 - - return(list( - has_tiles = has_tiles, - detected_tiles = tile_files, - total_files = length(tif_files), - source = "file_pattern_detection" - )) + return(client_type) } -# 2. Define project directory structure -# ----------------------------------- -setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") { - # Base directories - laravel_storage_dir <- here("laravel_app/storage/app", project_dir) +# Client-specific KPI configurations +# Defines which KPIs and outputs are required for each client type +CLIENT_TYPE_CONFIGS <- list( - # Determine which TIF source folder to use based on data_source parameter - # Default is merged_tif_8b for newer data with cloud masking (8-band + UDM) - # Alternative: merged_tif for 4-band legacy data - merged_tif_folder <- here(laravel_storage_dir, data_source) + # Aura (agronomic_support): Farm-level KPI summaries for agronomists + "agronomic_support" = list( + client_type = "agronomic_support", + description = "Farm-level KPI summaries for agronomic decision support", + kpi_calculations = c( + "field_uniformity", + "area_change", + "tch_forecasted", + "growth_decline", + "weed_presence", + "gap_filling" + ), + outputs = c("kpi_summary_tables", "field_details"), + requires_harvest_data = FALSE, + script_90_compatible = TRUE, + script_91_compatible = FALSE + ), - # Detect tile mode based on metadata from script 10 or file patterns - merged_final_dir <- here(laravel_storage_dir, "merged_final_tif") + # Cane Supply (cane_supply): Per-field analysis with harvest prediction + "cane_supply" = list( + client_type = "cane_supply", + description = "Per-field analysis with harvest prediction and phase assignment", + kpi_calculations = c( + "per_field_analysis", + "phase_assignment", + "harvest_prediction", + "status_triggers" + ), + outputs = c("field_analysis_excel", "field_analysis_summary"), + requires_harvest_data = TRUE, + script_90_compatible = FALSE, + script_91_compatible = TRUE + ) +) + +#' Get KPI configuration for a specific client type +#' @param client_type Character (e.g., "agronomic_support", "cane_supply") +#' @return List containing configuration for that client type +get_client_kpi_config <- function(client_type) { + config <- CLIENT_TYPE_CONFIGS[[client_type]] + if (is.null(config)) { + warning(sprintf("Client type '%s' not found - using cane_supply defaults", client_type)) + return(CLIENT_TYPE_CONFIGS[["cane_supply"]]) + } + return(config) +} + +# ============================================================================== +# SECTION 3: DIRECTORY STRUCTURE SETUP +# ============================================================================== +# CENTRALIZED PATH MANAGEMENT: All file paths in the entire pipeline +# are derived from setup_project_directories(). +# This is the single source of truth for 8 tiers of directories. + +#' Setup complete project directory structure +#' +#' Creates all 8 tiers of directories and returns a comprehensive list +#' of paths for use throughout the pipeline. +#' +#' @param project_dir Character. Project name (e.g., "angata", "aura") +#' @param data_source Character. "merged_tif" (default) or "merged_tif_8b" +#' @return List containing all 8 tiers of paths +#' +#' @details +#' TIER 1: Raw data (merged_tif) - Python download output +#' TIER 2: Per-field TIFFs - Script 10 output +#' TIER 3: CI extraction - Script 20 output +#' TIER 4: Growth model - Script 30 output +#' TIER 5: Mosaics - Script 40 output +#' TIER 6: KPI & reports - Scripts 80/90/91 output +#' TIER 7: Support files - GeoJSON, Excel, logs +#' TIER 8: Metadata - Config, CRS info +setup_project_directories <- function(project_dir, data_source = "merged_tif") { + # BASE DIRECTORIES + laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir) + + # TIER 1: RAW DATA (Script 00 output - Python download) + merged_tif_folder <- here(laravel_storage_dir, "merged_tif") + + # TIER 2: PER-FIELD TIFFS (Script 10 output) + field_tiles_dir <- here(laravel_storage_dir, "field_tiles") + field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI") daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split") - tile_detection <- detect_mosaic_mode( - merged_final_tif_dir = merged_final_dir, - daily_tiles_split_dir = daily_tiles_split_dir - ) - use_tile_mosaic <- tile_detection$has_tiles + # SUPPORT TIER: DATA DIRECTORY (define early for use in later tiers) + data_dir <- here(laravel_storage_dir, "Data") - # Main subdirectories - dirs <- list( - reports = here(laravel_storage_dir, "reports"), - logs = here(laravel_storage_dir, "logs"), - data = here(laravel_storage_dir, "Data"), - tif = list( - merged = merged_tif_folder, # Use data_source parameter to select folder - final = merged_final_dir - ), - weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"), - weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"), - extracted_ci = list( - base = here(laravel_storage_dir, "Data/extracted_ci"), - daily = here(laravel_storage_dir, "Data/extracted_ci/daily_vals"), - cumulative = here(laravel_storage_dir, "Data/extracted_ci/cumulative_vals") - ), - vrt = here(laravel_storage_dir, "Data/vrt"), - harvest = here(laravel_storage_dir, "Data/HarvestData") - ) + # TIER 3: CI EXTRACTION (Script 20 output) + # Structure: Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds (per-field daily CI values) + extracted_ci_base_dir <- here(data_dir, "extracted_ci") + daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals") # Per-field structure + cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals") + ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python") + + # TIER 4: GROWTH MODEL (Script 30 output) + growth_model_interpolated_dir <- here(data_dir, "growth_model_interpolated") + + # TIER 5: MOSAICS (Script 40 output) + weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic") + weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max") + + # TIER 6: KPI & REPORTING (Scripts 80/90/91 output) + reports_dir <- here(laravel_storage_dir, "reports") + kpi_reports_dir <- here(reports_dir, "kpis", "field_level") + kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats") + kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis") + + # TIER 7: SUPPORT (various scripts) + vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction + harvest_dir <- here(data_dir, "harvest") # Harvest data directory + log_dir <- here(laravel_storage_dir, "logs") # Create all directories - for (dir_path in unlist(dirs)) { + all_dirs <- c( + merged_tif_folder, field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir, + extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir, + growth_model_interpolated_dir, + weekly_mosaic_dir, weekly_tile_max_dir, + reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir, + data_dir, vrt_dir, harvest_dir, log_dir + ) + + for (dir_path in all_dirs) { dir.create(dir_path, showWarnings = FALSE, recursive = TRUE) } - # Return directory structure for use in other functions + # TIER 8: CONFIG & METADATA PATHS + field_boundaries_path <- here(data_dir, "pivot.geojson") + tiling_config_path <- here(laravel_storage_dir, "tiling_config.json") + + # Return comprehensive list return(list( + # PROJECT ROOT laravel_storage_dir = laravel_storage_dir, - reports_dir = dirs$reports, - log_dir = dirs$logs, - data_dir = dirs$data, - planet_tif_folder = dirs$tif$merged, - merged_final = dirs$tif$final, - daily_CI_vals_dir = dirs$extracted_ci$daily, - cumulative_CI_vals_dir = dirs$extracted_ci$cumulative, - weekly_CI_mosaic = if (use_tile_mosaic) dirs$weekly_tile_max else dirs$weekly_mosaic, # SMART: Route based on tile detection - daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction - weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output) - use_tile_mosaic = use_tile_mosaic, # Flag indicating if tiles are used for this project - tile_detection_info = list( - has_tiles = tile_detection$has_tiles, - detected_source = tile_detection$source, - detected_count = tile_detection$total_files, - grid_size = tile_detection$grid_size %||% "unknown", - sample_tiles = head(tile_detection$detected_tiles, 3) - ), - harvest_dir = dirs$harvest, - extracted_CI_dir = dirs$extracted_ci$base + + # TIER 1: Raw data + merged_tif_folder = merged_tif_folder, + + # TIER 2: Per-field TIFFs + field_tiles_dir = field_tiles_dir, + field_tiles_ci_dir = field_tiles_ci_dir, + daily_tiles_split_dir = daily_tiles_split_dir, + + # TIER 3: CI extraction + extracted_ci_base_dir = extracted_ci_base_dir, + daily_ci_vals_dir = daily_ci_vals_dir, + cumulative_ci_vals_dir = cumulative_ci_vals_dir, + ci_for_python_dir = ci_for_python_dir, + + # TIER 4: Growth model + growth_model_interpolated_dir = growth_model_interpolated_dir, + + # TIER 5: Mosaics + weekly_mosaic_dir = weekly_mosaic_dir, + weekly_tile_max_dir = weekly_tile_max_dir, + + # TIER 6: KPI & reporting + reports_dir = reports_dir, + kpi_reports_dir = kpi_reports_dir, + kpi_field_stats_dir = kpi_field_stats_dir, + kpi_field_analysis_dir = kpi_field_analysis_dir, + + # TIER 7: Support + data_dir = data_dir, + vrt_dir = vrt_dir, + harvest_dir = harvest_dir, + log_dir = log_dir, + + # TIER 8: Metadata + field_boundaries_path = field_boundaries_path, + tiling_config_path = tiling_config_path )) } -#set working dir. -# 3. Load field boundaries -# ---------------------- +# ============================================================================== +# SECTION 4: DATE/WEEK UTILITY FUNCTIONS +# ============================================================================== +# ISO 8601 week/year functions for consistent date handling across scripts + +#' Extract ISO week number from a date +#' @param date Date object or string convertible to Date +#' @return Numeric ISO week number (1-53) +get_iso_week <- function(date) { + as.numeric(format(date, "%V")) +} + +#' Extract ISO year from a date +#' @param date Date object or string convertible to Date +#' @return Numeric ISO year +get_iso_year <- function(date) { + as.numeric(format(date, "%G")) +} + +#' Extract both ISO week and year as a list +#' @param date Date object or string convertible to Date +#' @return List with elements: week (1-53), year +get_iso_week_year <- function(date) { + list( + week = as.numeric(format(date, "%V")), + year = as.numeric(format(date, "%G")) + ) +} + +#' Format date as a week/year label +#' @param date Date object or string convertible to Date +#' @param separator Character. Separator between week and year (default "_") +#' @return Character in format "week##_YYYY" (e.g., "week03_2025") +format_week_label <- function(date, separator = "_") { + wwy <- get_iso_week_year(date) + sprintf("week%02d%s%d", wwy$week, separator, wwy$year) +} + +# ============================================================================== +# SECTION 5: FIELD BOUNDARY & HARVEST DATA LOADERS +# ============================================================================== +# IMPORTANT: These functions are also defined in 00_common_utils.R +# to avoid duplication. Source 00_common_utils.R AFTER parameters_project.R +# to override these stub definitions with the full implementations. + +#' Load Field Boundaries from GeoJSON +#' +#' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson). +#' Handles CRS validation and column standardization. +#' +#' @param data_dir Directory containing GeoJSON file +#' @return List with elements: +#' - field_boundaries_sf: sf (Simple Features) object +#' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback) +#' load_field_boundaries <- function(data_dir) { - # Choose field boundaries file based on project and script type - # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model) - # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson - use_pivot_2 <- exists("project_dir") && project_dir == "esa" && - exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03 - - if (use_pivot_2) { - field_boundaries_path <- here(data_dir, "pivot_2.geojson") - } else { - field_boundaries_path <- here(data_dir, "pivot.geojson") - } - + field_boundaries_path <- file.path(data_dir, "pivot.geojson") + if (!file.exists(field_boundaries_path)) { - stop(paste("Field boundaries file not found at path:", field_boundaries_path)) + stop("Field boundaries file not found at:", field_boundaries_path) } tryCatch({ - # Read GeoJSON with explicit CRS handling - field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE) + boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE) - # Remove OBJECTID column immediately if it exists - if ("OBJECTID" %in% names(field_boundaries_sf)) { - field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID) + # Filter out features with empty geometries + boundaries_sf <- boundaries_sf[!st_is_empty(boundaries_sf), ] + + # Repair geometries if needed + if (!all(sf::st_is_valid(boundaries_sf))) { + boundaries_sf <- sf::st_make_valid(boundaries_sf) } - # Validate and fix CRS if needed - DO NOT call is.na on CRS objects as it can cause errors - # Just ensure CRS is set; terra will handle projection if needed - tryCatch({ - # Simply assign WGS84 if not already set (safe approach) - # This avoids any problematic is.na() calls on complex CRS objects - if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) { - st_crs(field_boundaries_sf) <- 4326 - warning("CRS was missing, assigned WGS84 (EPSG:4326)") - } - }, error = function(e) { - # If any CRS operation fails, just try to set it - tryCatch({ - st_crs(field_boundaries_sf) <<- 4326 - }, error = function(e2) { - # Silently continue - terra might handle it - warning(paste("Could not set CRS:", e2$message)) - }) - }) - - # Handle column names - accommodate optional sub_area column - # IMPORTANT: Must preserve geometry column properly when renaming sf object - if ("sub_area" %in% names(field_boundaries_sf)) { - # Reorder columns but keep geometry last - field_boundaries_sf <- field_boundaries_sf %>% - dplyr::select(field, sub_field, sub_area) %>% - sf::st_set_geometry("geometry") - } else { - # Reorder columns but keep geometry last - field_boundaries_sf <- field_boundaries_sf %>% - dplyr::select(field, sub_field) %>% - sf::st_set_geometry("geometry") - } - - # Convert to terra vector if possible, otherwise use sf - # Some GeoJSON files (like aura with complex MultiPolygons) may have GDAL/terra compatibility issues - field_boundaries <- tryCatch({ - field_boundaries_terra <- terra::vect(field_boundaries_sf) - - # Ensure terra object has valid CRS with safer checks - crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL) - crs_str <- if (!is.null(crs_value)) as.character(crs_value) else "" - - if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) { - terra::crs(field_boundaries_terra) <- "EPSG:4326" - warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)") - } - field_boundaries_terra - - }, error = function(e) { - warning(paste("Terra conversion failed, using sf object instead:", e$message)) - # Return sf object as fallback - functions will handle both types - field_boundaries_sf - }) + # Convert to terra SpatVect + boundaries_spat <- terra::vect(boundaries_sf) return(list( - field_boundaries_sf = field_boundaries_sf, - field_boundaries = field_boundaries + field_boundaries_sf = boundaries_sf, + field_boundaries = boundaries_spat )) }, error = function(e) { - cat("[DEBUG] Error in load_field_boundaries:\n") - cat(" Message:", e$message, "\n") - cat(" Call:", deparse(e$call), "\n") - stop(paste("Error loading field boundaries:", e$message)) + stop("Error loading field boundaries:", e$message) }) } -# 4. Load harvesting data -# --------------------- +#' Create Date List +#' +#' Creates a sequence of dates from end_date going back offset days +#' +#' @param end_date End date (Date object) +#' @param offset Number of days to go back +#' @return Character vector of dates in YYYY-MM-DD format +date_list <- function(end_date, offset) { + start_date <- end_date - offset + date_seq <- seq(start_date, end_date, by = "day") + format(date_seq, "%Y-%m-%d") +} + +#' Repair GeoJSON Geometries +#' +#' Validates and repairs invalid geometries in sf object +#' +#' @param sf_object sf object with potentially invalid geometries +#' @return sf object with repaired geometries +repair_geojson_geometries <- function(sf_object) { + if (!all(sf::st_is_valid(sf_object))) { + sf_object <- sf::st_make_valid(sf_object) + } + sf_object +} + +#' Load harvesting data from Excel +#' +#' Loads crop harvest schedule from harvest.xlsx file. +#' Handles flexible date formats (numeric, YYYY-MM-DD, DD/MM/YYYY, etc.). +#' +#' @param data_dir Directory containing harvest.xlsx file +#' @return Data frame with columns: field, sub_field, year, season_start, +#' season_end, age (weeks), sub_area, tonnage_ha. Returns NULL if not found. load_harvesting_data <- function(data_dir) { - harvest_file <- here(data_dir, "harvest.xlsx") + harvest_file <- file.path(data_dir, "harvest.xlsx") if (!file.exists(harvest_file)) { warning(paste("Harvest data file not found at path:", harvest_file)) @@ -295,15 +369,11 @@ load_harvesting_data <- function(data_dir) { if (inherits(x, "Date")) return(x) if (inherits(x, "POSIXct")) return(as.Date(x)) - # If it's numeric (Excel date serial), convert directly if (is.numeric(x)) { return(as.Date(x, origin = "1899-12-30")) } - # Try character conversion with multiple formats x_char <- as.character(x) - - # Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S") for (fmt in formats) { @@ -311,24 +381,15 @@ load_harvesting_data <- function(data_dir) { if (!is.na(result)) return(result) } - # If all else fails, return NA return(NA) } tryCatch({ harvesting_data <- read_excel(harvest_file) %>% - dplyr::select( - c( - "field", - "sub_field", - "year", - "season_start", - "season_end", - "age", - "sub_area", - "tonnage_ha" - ) - ) %>% + dplyr::select(c( + "field", "sub_field", "year", + "season_start", "season_end", "age", "sub_area", "tonnage_ha" + )) %>% mutate( field = as.character(field), sub_field = as.character(sub_field), @@ -357,41 +418,86 @@ load_harvesting_data <- function(data_dir) { }) } -# 5. Define logging functions globally first -# --------------------------------------- -# Create a simple default log function in case setup_logging hasn't been called yet +# ============================================================================== +# SECTION 6: LOGGING SYSTEM SETUP +# ============================================================================== +# Create default logging functions + +#' Safe Logging Function +#' +#' Generic logging with [LEVEL] prefix. Works standalone without any framework. +#' Consistent with SmartCane logging standard. +#' +#' @param message The message to log +#' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG" +#' @return NULL (invisible, used for side effects) +#' +safe_log <- function(message, level = "INFO") { + prefix <- sprintf("[%s]", level) + cat(sprintf("%s %s\n", prefix, message)) +} + +#' SmartCane Debug Logging (Conditional) +#' +#' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set. +#' +#' @param message The message to log +#' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE) +#' @return NULL (invisible, used for side effects) +#' +smartcane_debug <- function(message, verbose = FALSE) { + if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") { + return(invisible(NULL)) + } + safe_log(message, level = "DEBUG") +} + +#' SmartCane Warning Logging +#' +#' Logs WARN-level messages. Convenience wrapper around safe_log(). +#' +#' @param message The message to log +#' @return NULL (invisible, used for side effects) +#' +smartcane_warn <- function(message) { + safe_log(message, level = "WARN") +} + log_message <- function(message, level = "INFO") { - timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") - formatted_message <- paste0("[", level, "] ", timestamp, " - ", message) - cat(formatted_message, "\n") + prefix <- sprintf("[%s]", level) + cat(sprintf("%s %s\n", prefix, message)) } -log_head <- function(list, level = "INFO") { - log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) +log_head <- function(data, level = "INFO") { + log_message(paste(capture.output(str(head(data))), collapse = "\n"), level) } -# 6. Set up full logging system with file output -# ------------------------------------------- +#' Setup full logging system with file output +#' +#' Creates log directory and returns logging functions that write to both +#' console and log file. +#' +#' @param log_dir Character. Directory for log files +#' @return List with log_file path, log_message function, and log_head function setup_logging <- function(log_dir) { - log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log")) + dir.create(log_dir, showWarnings = FALSE, recursive = TRUE) + + log_file <- file.path(log_dir, sprintf( + "smartcane_%s.log", + format(Sys.time(), "%Y%m%d_%H%M%S") + )) - # Create enhanced log functions log_message <- function(message, level = "INFO") { - timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") - formatted_message <- paste0("[", level, "] ", timestamp, " - ", message) + prefix <- sprintf("[%s] [%s]", level, format(Sys.time(), "%Y-%m-%d %H:%M:%S")) + formatted_message <- sprintf("%s %s", prefix, message) + cat(formatted_message, "\n") cat(formatted_message, "\n", file = log_file, append = TRUE) - - # Also print to console for debugging - if (level %in% c("ERROR", "WARNING")) { - cat(formatted_message, "\n") - } } log_head <- function(list, level = "INFO") { log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) } - # Update the global functions with the enhanced versions assign("log_message", log_message, envir = .GlobalEnv) assign("log_head", log_head, envir = .GlobalEnv) @@ -402,62 +508,328 @@ setup_logging <- function(log_dir) { )) } -# 7. Initialize the project -# ---------------------- -# Export project directories and settings -initialize_project <- function(project_dir, data_source = "merged_tif_8b") { - # Set up directory structure, passing data_source to select TIF folder - dirs <- setup_project_directories(project_dir, data_source = data_source) +# ============================================================================== +# SECTION 6B: DATA SOURCE DETECTION +# ============================================================================== + +#' Detect data source for project +#' +#' Returns the data source directory (always "merged_tif" for consistency) +#' +#' @param project_dir Character. Project name +#' @return Character. "merged_tif" +detect_data_source <- function(project_dir) { + return("merged_tif") +} + +#' Detect tile structure from merged TIF directory +#' +#' Checks if tiles exist by looking for: +#' 1. tiling_config.json metadata file (most reliable) +#' 2. Grid subdirectories (5x5, 10x10, etc.) +#' 3. Tile-named files (*_XX.tif pattern) +#' +#' @param merged_final_tif_dir Character. Path to merged TIF directory +#' @param daily_tiles_split_dir Character. Optional path to tiles directory +#' @return List with has_tiles (logical), detected_tiles, total_files, source, grid_size +detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { + # PRIORITY 1: Check for tiling_config.json metadata file from script 10 + if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) { + config_files <- list.files(daily_tiles_split_dir, + pattern = "tiling_config\\.json$", + recursive = TRUE, + full.names = TRUE) + + if (length(config_files) > 0) { + config_file <- config_files[which.max(file.info(config_files)$mtime)] + + tryCatch({ + config_json <- jsonlite::read_json(config_file) + return(list( + has_tiles = config_json$has_tiles %||% TRUE, + detected_tiles = character(), + total_files = 0, + source = "tiling_config.json", + grid_size = config_json$grid_size %||% "unknown" + )) + }, error = function(e) { + warning("Error reading tiling_config.json: ", e$message) + }) + } + } - # Set up logging - logging <- setup_logging(dirs$log_dir) + # PRIORITY 2: File-based detection (fallback) + if (!dir.exists(merged_final_tif_dir)) { + return(list( + has_tiles = FALSE, + detected_tiles = character(), + total_files = 0, + source = "directory_not_found" + )) + } - # Load field boundaries - boundaries <- load_field_boundaries(dirs$data_dir) + # Check for grid-size subdirectories (5x5, 10x10, etc.) + grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) - # Load harvesting data - harvesting_data <- load_harvesting_data(dirs$data_dir) + if (length(grid_patterns) > 0) { + grid_size <- grid_patterns[1] + grid_dir <- file.path(merged_final_tif_dir, grid_size) + sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3] + + return(list( + has_tiles = TRUE, + detected_tiles = sample_tiles, + total_files = length(sample_tiles), + source = "grid_subdirectory_detection", + grid_size = grid_size, + grid_path = grid_dir + )) + } - # Return all initialized components - return(c( - dirs, - list( - logging = logging, - field_boundaries = boundaries$field_boundaries, - field_boundaries_sf = boundaries$field_boundaries_sf, - harvesting_data = harvesting_data - ) + # Check for tile-named files (*_XX.tif pattern) + tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE) + + if (length(tif_files) == 0) { + return(list( + has_tiles = FALSE, + detected_tiles = character(), + total_files = 0, + source = "no_files_found" + )) + } + + tile_pattern <- "_(\\d{2})\\.tif$" + tile_files <- tif_files[grepl(tile_pattern, tif_files)] + has_tiles <- length(tile_files) > 0 + + return(list( + has_tiles = has_tiles, + detected_tiles = tile_files, + total_files = length(tif_files), + source = "file_pattern_detection" )) } -# When script is sourced, initialize with the global project_dir variable if it exists -if (exists("project_dir")) { - # Now we can safely log before initialization - log_message(paste("Initializing project with directory:", project_dir)) +# ============================================================================== +# SECTION 7: MOSAIC & KPI VERIFICATION HELPERS +# ============================================================================== +# Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths + +#' Detect mosaic mode from project structure +#' +#' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics +#' +#' @param project_dir Character. Project name +#' @return Character. "tiled" or "single-file" +detect_mosaic_mode <- function(project_dir) { + # Per-field architecture is standard - always return "single-file" + # unless weekly_tile_max directory exists with content + mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") - # Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default - data_src <- if (exists("data_source")) data_source else "merged_tif_8b" - log_message(paste("Using data source directory:", data_src)) - - project_config <- initialize_project(project_dir, data_source = data_src) - - # Expose all variables to the global environment - list2env(project_config, envir = .GlobalEnv) - - # Log project initialization completion with tile mode info - log_message(paste("Project initialized with directory:", project_dir)) - if (exists("use_tile_mosaic")) { - mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE" - log_message(paste("Mosaic mode detected:", mosaic_mode)) - if (exists("tile_detection_info") && !is.null(tile_detection_info)) { - log_message(paste(" - Detection source:", tile_detection_info$detected_source)) - log_message(paste(" - Grid size:", tile_detection_info$grid_size)) - log_message(paste(" - Detected files in storage:", tile_detection_info$detected_count)) - if (length(tile_detection_info$sample_tiles) > 0) { - log_message(paste(" - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", "))) - } - } + if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) { + return("tiled") } + return("single-file") +} + +#' Detect grid size from tile directory structure +#' +#' For per-field architecture, returns "unknown" (grid-based tiling is legacy) +#' +#' @param project_dir Character. Project name +#' @return Character. Grid size ("unknown" for per-field) +detect_grid_size <- function(project_dir) { + # Per-field architecture doesn't use grid-based organization + return("unknown") +} + +#' Get project storage path +#' +#' @param project_dir Character. Project name +#' @param subdir Character. Optional subdirectory (default NULL) +#' @return Character. Full path +get_project_storage_path <- function(project_dir, subdir = NULL) { + path <- file.path("laravel_app", "storage", "app", project_dir) + if (!is.null(subdir)) { + path <- file.path(path, subdir) + } + return(path) +} + +#' Get mosaic directory +#' +#' @param project_dir Character. Project name +#' @param mosaic_mode Character. "tiled" or "single-file" +#' @return Character. Full path to mosaic directory +get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") { + if (mosaic_mode == "auto") { + mosaic_mode <- detect_mosaic_mode(project_dir) + } + + if (mosaic_mode == "tiled") { + get_project_storage_path(project_dir, "weekly_tile_max") + } else { + get_project_storage_path(project_dir, "weekly_mosaic") + } +} + +#' Get KPI directory based on client type +#' +#' @param project_dir Character. Project name +#' @param client_type Character. Client type +#' @return Character. Full path to KPI directory +get_kpi_dir <- function(project_dir, client_type) { + base_path <- get_project_storage_path(project_dir, "reports/kpis") + + if (client_type == "agronomic_support") { + return(file.path(base_path, "field_level")) + } else { + return(file.path(base_path, "field_analysis")) + } +} + +#' Get expected output path for harvest imminent file +#' +#' @param project_dir Character. Project name +#' @param week_num Integer. ISO week number +#' @param year_num Integer. Year +#' @return Character. Full path to expected harvest imminent CSV file +get_harvest_output_path <- function(project_dir, week_num, year_num) { + file.path( + "laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats", + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, week_num, year_num) + ) +} + +#' Check if harvest output file exists for a specific week +#' +#' @param project_dir Character. Project name +#' @param week_num Integer. ISO week number +#' @param year_num Integer. Year +#' @return Logical. TRUE if file exists +check_harvest_output_exists <- function(project_dir, week_num, year_num) { + path <- get_harvest_output_path(project_dir, week_num, year_num) + file.exists(path) +} + +#' Get mosaic verification directory +#' +#' @param project_dir Character. Project name +#' @param mosaic_mode Character. "tiled" or "single-file" +#' @return Character. Full path to mosaic directory +get_mosaic_verification_dir <- function(project_dir, mosaic_mode) { + base <- file.path("laravel_app", "storage", "app", project_dir) + + if (mosaic_mode == "tiled") { + file.path(base, "weekly_tile_max") + } else { + file.path(base, "weekly_mosaic") + } +} + +#' Check if mosaic files exist for a specific week +#' +#' @param project_dir Character. Project name +#' @param week_num Integer. ISO week number +#' @param year_num Integer. Year +#' @param mosaic_mode Character. "tiled" or "single-file" +#' @return List with created (logical), file_count (int), sample_files (char vector) +check_mosaic_exists <- function(project_dir, week_num, year_num, mosaic_mode) { + mosaic_dir <- get_mosaic_verification_dir(project_dir, mosaic_mode) + + if (!dir.exists(mosaic_dir)) { + return(list(created = FALSE, file_count = 0, sample_files = character())) + } + + week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) + mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE) + + list( + created = length(mosaic_files) > 0, + file_count = length(mosaic_files), + sample_files = head(mosaic_files, 3) + ) +} + +#' Check KPI completeness for reporting window +#' +#' @param project_dir Character. Project name +#' @param client_type Character. Client type +#' @param end_date Date. End date of reporting window +#' @param reporting_weeks Integer. Number of weeks to check +#' @return List with kpi_dir, kpis_df, missing_count +check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks = 1) { + kpi_dir <- get_kpi_dir(project_dir, client_type) + + if (!dir.exists(kpi_dir)) { + dir.create(kpi_dir, showWarnings = FALSE, recursive = TRUE) + } + + kpis_df <- data.frame() + missing_count <- 0 + + for (weeks_back in 0:(reporting_weeks - 1)) { + target_week <- end_date - lubridate::weeks(weeks_back) + wwy <- get_iso_week_year(target_week) + + # Check for KPI file for this week + kpi_pattern <- sprintf("week_%02d_%d", wwy$week, wwy$year) + kpi_files <- list.files(kpi_dir, pattern = kpi_pattern) + has_kpis <- length(kpi_files) > 0 + file_count <- length(kpi_files) + + if (!has_kpis) missing_count <- missing_count + 1 + + kpis_df <- rbind(kpis_df, data.frame( + week = wwy$week, + year = wwy$year, + date = target_week, + has_kpis = has_kpis, + file_count = file_count + )) + } + + return(list( + kpi_dir = kpi_dir, + kpis_df = kpis_df, + missing_count = missing_count + )) +} + +# ============================================================================== +# SECTION 8: PROJECT INITIALIZATION +# ============================================================================== + +#' Initialize the project +#' +#' Sets up directory structure, logging, and loads configuration +#' +#' @param project_dir Character. Project name +#' @param data_source Character. "merged_tif" or "merged_tif_8b" +#' @return List with all project directories and settings +initialize_project <- function(project_dir, data_source = "merged_tif") { + dirs <- setup_project_directories(project_dir, data_source = data_source) + logging <- setup_logging(dirs$log_dir) + + return(list( + dirs = dirs, + logging = logging, + project_dir = project_dir, + client_type = get_client_type(project_dir) + )) +} + +# ============================================================================== +# AUTO-INITIALIZATION: Set project_dir global if not already set +# ============================================================================== + +if (exists("project_dir")) { + assign("project_dir", project_dir, envir = .GlobalEnv) } else { warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R") } + +# ============================================================================== +# END PARAMETERS_PROJECT_2.R +# ============================================================================== diff --git a/r_app/parameters_project_OLD.R b/r_app/parameters_project_OLD.R new file mode 100644 index 0000000..a5d9224 --- /dev/null +++ b/r_app/parameters_project_OLD.R @@ -0,0 +1,1240 @@ +# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R +# +# PARAMETERS_PROJECT.R +# ==================== +# This script defines project parameters, directory structures, and loads field boundaries. +# It establishes all the necessary paths and creates required directories for the SmartCane project. + +# 1. Load required libraries +# ------------------------- +suppressPackageStartupMessages({ + library(here) + library(readxl) + library(sf) + library(dplyr) + library(tidyr) + library(jsonlite) # For reading tiling_config.json +}) + +# 2. Client type mapping (for conditional script execution) +# --------------------------------------------------------- +# Maps project names to client types for pipeline control +# Client types: +# - "cane_supply": Runs Scripts 20,21,30,31,80,91 (full pipeline with Excel output) +# - "agronomic_support": Runs Scripts 80,90 only (KPI calculation + Word report) +# - "extension_service": (Future - not yet implemented) +# +# NOTE: This will eventually migrate to Laravel environment variables/database +# For now, maintain this mapping and update as projects are added +CLIENT_TYPE_MAP <- list( + "angata" = "cane_supply", + "aura" = "agronomic_support", + "chemba" = "cane_supply", + "xinavane" = "cane_supply", + "esa" = "cane_supply" +) + +get_client_type <- function(project_name) { + client_type <- CLIENT_TYPE_MAP[[project_name]] + if (is.null(client_type)) { + warning(sprintf("Project '%s' not in CLIENT_TYPE_MAP - defaulting to 'cane_supply'", project_name)) + return("cane_supply") + } + return(client_type) +} + +# 2b. Client-specific KPI configurations +# ---------------------------------------- +# Defines which KPIs and outputs are required for each client type +# This enables Script 80 to conditionally calculate only relevant metrics +# +# Structure: +# - kpi_calculations: Vector of KPI types to calculate for this client +# - outputs: Vector of output formats to generate (determines RDS/Excel naming) +# - requires_harvest_data: Boolean - whether Script 31 harvest predictions are needed +# - script_90_compatible: Boolean - whether output should match Script 90 expectations +# - script_91_compatible: Boolean - whether output should match Script 91 expectations +# +CLIENT_TYPE_CONFIGS <- list( + + # Aura (agronomic_support): Farm-level KPI summaries for weekly reports to agronomists + "agronomic_support" = list( + client_type = "agronomic_support", + description = "Farm-level KPI summaries for agronomic decision support", + kpi_calculations = c( + "field_uniformity", + "area_change", + "tch_forecasted", + "growth_decline", + "weed_presence", + "gap_filling" + ), + outputs = c( + "kpi_summary_tables", # Summary statistics for Script 90 report front page + "field_details" # Detailed field table for Script 90 report end section + ), + requires_harvest_data = FALSE, # Script 31 predictions not used + script_90_compatible = TRUE, # Output format matches Script 90 expectations + script_91_compatible = FALSE + ), + + # Cane Supply (cane_supply): Per-field analysis with harvest timing prediction + "cane_supply" = list( + client_type = "cane_supply", + description = "Per-field analysis with harvest prediction and phase assignment", + kpi_calculations = c( + "per_field_analysis", # Use 80_weekly_stats_utils.R for field-level statistics + "phase_assignment", # Assign growth phases (Germination, Tillering, Grand Growth, Maturation) + "harvest_prediction", # Include Script 31 harvest age predictions if available + "status_triggers" # Calculate field status (Normal, Monitor, Alert, Urgent) + ), + outputs = c( + "field_analysis_excel", # Excel file with per-field metrics + "field_analysis_summary" # Summary RDS for Script 91 report + ), + requires_harvest_data = TRUE, # harvest.xlsx is required for phase assignment + script_90_compatible = FALSE, + script_91_compatible = TRUE + ) +) + +#' Get KPI configuration for a specific client type +#' @param client_type Character string of client type (e.g., "agronomic_support", "cane_supply") +#' @return List containing configuration for that client type +get_client_kpi_config <- function(client_type) { + config <- CLIENT_TYPE_CONFIGS[[client_type]] + + if (is.null(config)) { + warning(sprintf("Client type '%s' not in CLIENT_TYPE_CONFIGS - defaulting to 'cane_supply'", client_type)) + return(CLIENT_TYPE_CONFIGS[["cane_supply"]]) + } + + return(config) +} + +# 3. Smart detection for tile-based vs single-file mosaic approach +# ---------------------------------------------------------------- +detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { + # PRIORITY 1: Check for tiling_config.json metadata file from script 10 + # This is the most reliable source since script 10 explicitly records its decision + + if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) { + # Try to find tiling_config.json in any grid-size subfolder + config_files <- list.files(daily_tiles_split_dir, + pattern = "tiling_config\\.json$", + recursive = TRUE, + full.names = TRUE) + + if (length(config_files) > 0) { + # Found a config file - use the most recent one + config_file <- config_files[which.max(file.info(config_files)$mtime)] + + tryCatch({ + config_json <- jsonlite::read_json(config_file) + return(list( + has_tiles = config_json$has_tiles %||% TRUE, + detected_tiles = character(), + total_files = 0, + source = "tiling_config.json", + grid_size = config_json$grid_size %||% "unknown" + )) + }, error = function(e) { + warning("Error reading tiling_config.json: ", e$message) + # Fall through to file-based detection + }) + } + } + + # PRIORITY 2: File-based detection (fallback if metadata not found) + # Check if merged_final_tif/ contains tile-named files OR grid-size subdirectories + + if (!dir.exists(merged_final_tif_dir)) { + return(list( + has_tiles = FALSE, + detected_tiles = character(), + total_files = 0, + source = "directory_not_found" + )) + } + + # First check if there are grid-size subdirectories (5x5, 10x10, etc.) + # This indicates the tiles are organized: merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif + grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) + + if (length(grid_patterns) > 0) { + # Found grid-size subdirectories - tiles exist! + grid_size <- grid_patterns[1] + grid_dir <- file.path(merged_final_tif_dir, grid_size) + + # List sample tile files from the grid directory + sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3] + + return(list( + has_tiles = TRUE, + detected_tiles = sample_tiles, + total_files = length(sample_tiles), + source = "grid_subdirectory_detection", + grid_size = grid_size, + grid_path = grid_dir + )) + } + + # Fall back to checking for tile-named files directly in merged_final_tif + # List all .tif files in merged_final_tif + tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE) + + if (length(tif_files) == 0) { + return(list( + has_tiles = FALSE, + detected_tiles = character(), + total_files = 0, + source = "no_files_found" + )) + } + + # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits) + # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif + tile_pattern <- "_(\\d{2})\\.tif$" + tile_files <- tif_files[grepl(tile_pattern, tif_files)] + + has_tiles <- length(tile_files) > 0 + + return(list( + has_tiles = has_tiles, + detected_tiles = tile_files, + total_files = length(tif_files), + source = "file_pattern_detection" + )) +} + +# 4. Define project directory structure +# ----------------------------------- +# ============================================================================== +# CENTRALIZED PATH MANAGEMENT - setup_project_directories() +# ============================================================================== +# This function is the single source of truth for ALL file paths used across the pipeline. +# All scripts should call this function once at startup and use returned paths. +# This eliminates ~88 hardcoded file.path() calls scattered across 8 scripts. +# +# USAGE: +# paths <- setup_project_directories(project_dir) +# merged_tif_dir <- paths$merged_tif_folder +# daily_ci_dir <- paths$daily_ci_vals_dir +# kpi_output_dir <- paths$kpi_reports_dir +# +# TIERS (8-layer directory structure): +# Tier 1: Raw data (merged_tif) +# Tier 2: Per-field TIFFs (field_tiles, field_tiles_CI) +# Tier 3: CI Extraction (daily_ci_vals, cumulative_ci_vals) +# Tier 4: Growth Model (growth_model_interpolated) +# Tier 5: Mosaics (weekly_mosaic, weekly_tile_max) +# Tier 6: KPI & Reporting (kpi_reports_dir, kpi_field_stats_dir) +# Tier 7: Support (data, vrt, harvest, logs) +# Tier 8: Config & Metadata (field_boundaries_path, tiling_config_path) +# +# BENEFITS: +# ✓ Single source of truth (eliminates ~88 hardcoded file.path() calls) +# ✓ Auto-creates all directories (no scattered dir.create() calls) +# ✓ Easy to update storage structure globally +# ✓ Consistent naming across all 8 scripts +# ============================================================================== +setup_project_directories <- function(project_dir, data_source = "merged_tif") { + # =========================================================================== + # BASE DIRECTORIES (Foundation for all paths) + # =========================================================================== + laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir) + + # =========================================================================== + # TIER 1: RAW DATA & INPUT PATHS (Script 00 - Python download output) + # =========================================================================== + merged_tif_folder <- here(laravel_storage_dir, "merged_tif") # 4-band raw GeoTIFFs from Planet + + # =========================================================================== + # TIER 2: TILING PATHS (Script 10 - Per-field tiff creation) + # =========================================================================== + # Per-field TIFF structure: field_tiles/{FIELD_NAME}/{YYYY-MM-DD}.tif + field_tiles_dir <- here(laravel_storage_dir, "field_tiles") + + # Per-field CI TIFFs (pre-computed, used by Script 40): field_tiles_CI/{FIELD_NAME}/{YYYY-MM-DD}.tif + field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI") + + # Legacy tiling (for backward compatibility): daily_tiles_split/{grid_size}/{YYYY-MM-DD}/{YYYY-MM-DD}_XX.tif + daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split") + + # =========================================================================== + # TIER 3: CI EXTRACTION PATHS (Script 20 - Canopy Index calculation) + # =========================================================================== + extracted_ci_base_dir <- here(laravel_storage_dir, "Data", "extracted_ci") + + # Daily CI values (cumulative RDS): combined_CI_data.rds + daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals") + + # Cumulative CI across time: All_pivots_Cumulative_CI_quadrant_year_v2.rds + cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals") + + # Per-field CI data for Python harvest prediction (Script 21): ci_data_for_python.csv + ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python") + + # =========================================================================== + # TIER 4: GROWTH MODEL PATHS (Script 30 - Interpolation & smoothing) + # =========================================================================== + growth_model_interpolated_dir <- here(laravel_storage_dir, "growth_model_interpolated") + + # =========================================================================== + # TIER 5: MOSAIC PATHS (Script 40 - Weekly mosaics) + # =========================================================================== + # Per-field weekly mosaics (per-field architecture): weekly_mosaic/{FIELD}/{week_XX_YYYY}.tif + weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic") + + # Tile-based weekly max (legacy): weekly_tile_max/{grid_size}/week_XX_YYYY.tif + weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max") + + # =========================================================================== + # TIER 6: KPI & REPORTING PATHS (Scripts 80, 90, 91) + # =========================================================================== + reports_dir <- here(laravel_storage_dir, "reports") + kpi_reports_dir <- here(reports_dir, "kpis") # Where Script 80 outputs KPI CSV/RDS files + kpi_field_stats_dir <- here(kpi_reports_dir, "field_stats") # Per-field KPI details + kpi_field_analysis_dir <- here(kpi_reports_dir, "field_analysis") # Field-level analysis for Script 91 + + # =========================================================================== + # TIER 7: SUPPORT PATHS (Data, VRT, Harvest) + # =========================================================================== + data_dir <- here(laravel_storage_dir, "Data") + vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction + harvest_dir <- here(data_dir, "HarvestData") # Harvest schedule data + log_dir <- here(laravel_storage_dir, "logs") # Log files + + # =========================================================================== + # TIER 8: CONFIG & METADATA PATHS + # =========================================================================== + # Field boundaries GeoJSON (same across all scripts) + field_boundaries_path <- here(data_dir, "pivot.geojson") + + # Tiling configuration metadata from Script 10 + tiling_config_path <- here(daily_tiles_split_dir, "tiling_config.json") + + # =========================================================================== + # CREATE ALL DIRECTORIES (once per pipeline run) + # =========================================================================== + all_dirs <- c( + # Tier 1 + merged_tif_folder, + # Tier 2 + field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir, + # Tier 3 + extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir, + # Tier 4 + growth_model_interpolated_dir, + # Tier 5 + weekly_mosaic_dir, weekly_tile_max_dir, + # Tier 6 + reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir, + # Tier 7 + data_dir, vrt_dir, harvest_dir, log_dir + ) + + for (dir_path in all_dirs) { + dir.create(dir_path, showWarnings = FALSE, recursive = TRUE) + } + + # =========================================================================== + # RETURN COMPREHENSIVE PATH LIST + # Scripts should source parameters_project.R and receive paths object like: + # paths <- setup_project_directories(project_dir) + # Then use: paths$merged_tif_folder, paths$daily_ci_vals_dir, etc. + # =========================================================================== + return(list( + # PROJECT ROOT + laravel_storage_dir = laravel_storage_dir, + + # TIER 1: Raw data + merged_tif_folder = merged_tif_folder, + + # TIER 2: Per-field TIFFs + field_tiles_dir = field_tiles_dir, + field_tiles_ci_dir = field_tiles_ci_dir, + daily_tiles_split_dir = daily_tiles_split_dir, + + # TIER 3: CI Extraction + extracted_ci_base_dir = extracted_ci_base_dir, + daily_ci_vals_dir = daily_ci_vals_dir, + cumulative_ci_vals_dir = cumulative_ci_vals_dir, + ci_for_python_dir = ci_for_python_dir, + + # TIER 4: Growth Model + growth_model_interpolated_dir = growth_model_interpolated_dir, + + # TIER 5: Mosaics + weekly_mosaic_dir = weekly_mosaic_dir, + weekly_tile_max_dir = weekly_tile_max_dir, + + # TIER 6: KPI & Reporting + reports_dir = reports_dir, + kpi_reports_dir = kpi_reports_dir, + kpi_field_stats_dir = kpi_field_stats_dir, + kpi_field_analysis_dir = kpi_field_analysis_dir, + + # TIER 7: Support + data_dir = data_dir, + vrt_dir = vrt_dir, + harvest_dir = harvest_dir, + log_dir = log_dir, + + # TIER 8: Config & Metadata + field_boundaries_path = field_boundaries_path, + tiling_config_path = tiling_config_path + )) +} + +# ============================================================================== +# TIER-BY-TIER PATH REFERENCE (for setup_project_directories output) +# ============================================================================== +# +# TIER 1: RAW DATA (Script 00 - Python download) +# paths$merged_tif_folder +# └─ {YYYY-MM-DD}.tif (4-band uint16 GeoTIFFs from Planet API) +# +# TIER 2: PER-FIELD TIFFS (Script 10) +# paths$field_tiles_dir/{FIELD_NAME}/{YYYY-MM-DD}.tif +# paths$field_tiles_ci_dir/{FIELD_NAME}/{YYYY-MM-DD}.tif +# paths$daily_tiles_split_dir/{grid_size}/{YYYY-MM-DD}/{YYYY-MM-DD}_XX.tif (legacy) +# +# TIER 3: CI EXTRACTION (Script 20) +# paths$daily_ci_vals_dir/combined_CI_data.rds +# paths$cumulative_ci_vals_dir/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# paths$ci_for_python_dir/ci_data_for_python.csv (Script 21 output) +# +# TIER 4: GROWTH MODEL (Script 30) +# paths$growth_model_interpolated_dir/ (RDS files with interpolated CI) +# +# TIER 5: MOSAICS (Script 40) +# paths$weekly_mosaic_dir/{FIELD_NAME}/week_XX_YYYY.tif +# paths$weekly_tile_max_dir/{grid_size}/week_XX_YYYY_00.tif (legacy) +# +# TIER 6: KPI & REPORTING (Scripts 80, 90, 91) +# paths$kpi_reports_dir/ (KPI outputs from Script 80) +# paths$kpi_field_stats_dir/ (Per-field KPI RDS) +# paths$kpi_field_analysis_dir/ (Analysis RDS for Script 91) +# paths$reports_dir/ (Word/HTML reports) +# +# TIER 7: SUPPORT (Various scripts) +# paths$data_dir/pivot.geojson (Field boundaries) +# paths$data_dir/harvest.xlsx (Harvest schedule) +# paths$vrt_dir/ (Virtual raster files) +# paths$harvest_dir/ (Harvest predictions from Python) +# paths$log_dir/ (Pipeline logs) +# +# TIER 8: CONFIG & METADATA +# paths$field_boundaries_path (Full path to pivot.geojson) +# paths$tiling_config_path (Metadata from Script 10) +# +# ============================================================================== + +# 5. Utility Functions +# ---------------------- +# NOTE: load_field_boundaries() and load_harvesting_data() are defined in 00_common_utils.R +# to avoid duplication. They are sourced after parameters_project.R and used by all scripts. + +# 6. Load harvesting data +# --------------------- +load_harvesting_data <- function(data_dir) { + harvest_file <- here(data_dir, "harvest.xlsx") + + if (!file.exists(harvest_file)) { + warning(paste("Harvest data file not found at path:", harvest_file)) + return(NULL) + } + + # Helper function to parse dates with multiple format detection + parse_flexible_date <- function(x) { + if (is.na(x) || is.null(x)) return(NA_real_) + if (inherits(x, "Date")) return(x) + if (inherits(x, "POSIXct")) return(as.Date(x)) + + # If it's numeric (Excel date serial), convert directly + if (is.numeric(x)) { + return(as.Date(x, origin = "1899-12-30")) + } + + # Try character conversion with multiple formats + x_char <- as.character(x) + + # Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS + formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S") + + for (fmt in formats) { + result <- suppressWarnings(as.Date(x_char, format = fmt)) + if (!is.na(result)) return(result) + } + + # If all else fails, return NA + return(NA) + } + + tryCatch({ + harvesting_data <- read_excel(harvest_file) %>% + dplyr::select( + c( + "field", + "sub_field", + "year", + "season_start", + "season_end", + "age", + "sub_area", + "tonnage_ha" + ) + ) %>% + mutate( + field = as.character(field), + sub_field = as.character(sub_field), + year = as.numeric(year), + season_start = sapply(season_start, parse_flexible_date), + season_end = sapply(season_end, parse_flexible_date), + season_start = as.Date(season_start, origin = "1970-01-01"), + season_end = as.Date(season_end, origin = "1970-01-01"), + age = as.numeric(age), + sub_area = as.character(sub_area), + tonnage_ha = as.numeric(tonnage_ha) + ) %>% + mutate( + season_end = case_when( + season_end > Sys.Date() ~ Sys.Date(), + is.na(season_end) ~ Sys.Date(), + TRUE ~ season_end + ), + age = round(as.numeric(season_end - season_start) / 7, 0) + ) + + return(harvesting_data) + }, error = function(e) { + warning(paste("Error loading harvesting data:", e$message)) + return(NULL) + }) +} + +# 5. Define logging functions globally first +# --------------------------------------- +# Create a simple default log function in case setup_logging hasn't been called yet +log_message <- function(message, level = "INFO") { + timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") + formatted_message <- paste0("[", level, "] ", timestamp, " - ", message) + cat(formatted_message, "\n") +} + +log_head <- function(list, level = "INFO") { + log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) +} + +# 8. Set up full logging system with file output +# ------------------------------------------- +setup_logging <- function(log_dir) { + log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log")) + + # Create enhanced log functions + log_message <- function(message, level = "INFO") { + timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") + formatted_message <- paste0("[", level, "] ", timestamp, " - ", message) + cat(formatted_message, "\n", file = log_file, append = TRUE) + + # Also print to console for debugging + if (level %in% c("ERROR", "WARNING")) { + cat(formatted_message, "\n") + } + } + + log_head <- function(list, level = "INFO") { + log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) + } + + # Update the global functions with the enhanced versions + assign("log_message", log_message, envir = .GlobalEnv) + assign("log_head", log_head, envir = .GlobalEnv) + + return(list( + log_file = log_file, + log_message = log_message, + log_head = log_head + )) +} + +# 8. HELPER FUNCTIONS FOR COMMON CALCULATIONS +# ----------------------------------------------- +# Centralized functions to reduce duplication across scripts + +# Get ISO week and year from a date +get_iso_week <- function(date) { + as.numeric(format(date, "%V")) +} + +get_iso_year <- function(date) { + as.numeric(format(date, "%G")) +} + +# Get both ISO week and year as a list +get_iso_week_year <- function(date) { + list( + week = as.numeric(format(date, "%V")), + year = as.numeric(format(date, "%G")) + ) +} + +# Format week/year into a readable label +format_week_label <- function(date, separator = "_") { + wwy <- get_iso_week_year(date) + sprintf("week%02d%s%d", wwy$week, separator, wwy$year) +} + +# Auto-detect mosaic mode +# For per-field architecture, always returns "single-file" (weekly_mosaic/{FIELD}/week_*.tif) +detect_mosaic_mode <- function(project_dir) { + # Per-field architecture uses single-file mosaics organized per-field + weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") + if (dir.exists(weekly_mosaic)) { + return("single-file") # Per-field structure + } + return("unknown") +} + +# Auto-detect grid size from tile directory structure +# For per-field architecture, returns "unknown" since grid-based organization is legacy +detect_grid_size <- function(project_dir) { + # Per-field architecture doesn't use grid-based organization anymore + return("unknown") +} + +# Build storage paths consistently across all scripts +get_project_storage_path <- function(project_dir, subdir = NULL) { + base <- file.path("laravel_app", "storage", "app", project_dir) + if (!is.null(subdir)) file.path(base, subdir) else base +} + +get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") { + # Per-field architecture always uses weekly_mosaic (single-file, per-field organization) + get_project_storage_path(project_dir, "weekly_mosaic") +} + +get_kpi_dir <- function(project_dir, client_type) { + subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis" + get_project_storage_path(project_dir, file.path("reports", "kpis", subdir)) +} + +# Logging functions moved to 00_common_utils.R +# - smartcane_log() — Main logging function with level prefix +# - smartcane_debug() — Conditional debug logging +# - smartcane_warn() — Warning wrapper +# Import with: source("r_app/00_common_utils.R") + +# ============================================================================ +# PHASE 3 & 4: OPTIMIZATION & DOCUMENTATION +# ============================================================================ + +# System Constants +# ---------------- +# Define once, use everywhere + +RSCRIPT_PATH <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" +# Used in run_full_pipeline.R for calling R scripts via system() + +# Data Source Documentation +# --------------------------- +# Explains the two satellite data formats and when to use each +# +# SmartCane uses PlanetScope imagery from Planet Labs API in two formats: +# +# 1. merged_tif (4-band): +# - Standard format: Red, Green, Blue, Near-Infrared +# - Size: ~150-200 MB per date +# - Use case: Agronomic support, general crop health monitoring +# - Projects: aura, xinavane +# - Cloud handling: Basic cloud masking from Planet metadata +# +# 2. merged_tif_8b (8-band with cloud confidence): +# - Enhanced format: 4-band imagery + 4-band UDM2 cloud mask +# - UDM2 bands: Clear, Snow, Shadow, Light Haze +# - Size: ~250-350 MB per date +# - Use case: Harvest prediction, supply chain optimization +# - Projects: angata, chemba, esa (cane_supply clients) +# - Cloud handling: Per-pixel cloud confidence from Planet UDM2 +# - Why: Cane supply chains need precise confidence to predict harvest dates +# (don't want to predict based on cloudy data) +# +# The system auto-detects which is available via detect_data_source() + +# Mosaic Mode Documentation +# -------------------------- +# SmartCane supports two ways to store and process weekly mosaics: +# +# 1. Single-file mosaic ("single-file"): +# - One GeoTIFF per week: weekly_mosaic/week_02_2026.tif +# - 5 bands per file: R, G, B, NIR, CI (Canopy Index) +# - Size: ~300-500 MB per week +# - Pros: Simpler file management, easier full-field visualization +# - Cons: Slower for field-specific queries, requires loading full raster +# - Best for: Agronomic support (aura) with <100 fields +# - Script 04 output: 5-band single-file mosaic +# +# 2. Tiled mosaic ("tiled"): +# - Grid of tiles per week: weekly_tile_max/5x5/week_02_2026_{TT}.tif +# - Example: 25 files (5×5 grid) × 5 bands = 125 individual tiffs +# - Size: ~15-20 MB per tile, organized in folders +# - Pros: Parallel processing, fast field lookups, scales to 1000+ fields +# - Cons: More file I/O, requires tile-to-field mapping metadata +# - Best for: Cane supply (angata, chemba) with 500+ fields +# - Script 04 output: Per-tile tiff files in weekly_tile_max/{grid}/ +# - Tile assignment: Field boundaries mapped to grid coordinates +# +# The system auto-detects which is available via detect_mosaic_mode() + +# Client Type Documentation +# -------------------------- +# SmartCane runs different analysis pipelines based on client_type: +# +# CLIENT_TYPE: cane_supply +# Purpose: Optimize sugar mill supply chain (harvest scheduling) +# Scripts run: 20 (CI), 21 (RDS to CSV), 30 (Growth), 31 (Harvest pred), 40 (Mosaic), 80 (KPI), 91 (Excel) +# Outputs: +# - Per-field analysis: field status, growth phase, harvest readiness +# - Excel reports (Script 91): Detailed metrics for logistics planning +# - KPI directory: reports/kpis/field_analysis/ (one RDS per week) +# Harvest data: Required (harvest.xlsx - planting dates for phase assignment) +# Data source: merged_tif_8b (uses cloud confidence for confidence) +# Mosaic mode: tiled (scales to 500+ fields) +# Projects: angata, chemba, xinavane, esa +# +# CLIENT_TYPE: agronomic_support +# Purpose: Provide weekly crop health insights to agronomists +# Scripts run: 80 (KPI), 90 (Word report) +# Outputs: +# - Farm-level KPI summaries (no per-field breakdown) +# - Word reports (Script 90): Charts and trends for agronomist decision support +# - KPI directory: reports/kpis/field_level/ (one RDS per week) +# Harvest data: Not used +# Data source: merged_tif (simpler, smaller) +# Mosaic mode: single-file (100-200 fields) +# Projects: aura +# + +# Detect data source (merged_tif vs merged_tif_8b) based on availability +# Returns the first available source; defaults to merged_tif_8b if neither exists +detect_data_source <- function(project_dir) { + # Data source is always merged_tif for consistency + return("merged_tif") +} + +# Check KPI completeness for a reporting period +# Returns: List with kpis_df (data.frame), missing_count, and all_complete (boolean) +# This replaces duplicate KPI checking logic in run_full_pipeline.R (lines ~228-270, ~786-810) +check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks_needed) { + kpi_dir <- get_kpi_dir(project_dir, client_type) + + kpis_needed <- data.frame() + + for (weeks_back in 0:(reporting_weeks_needed - 1)) { + check_date <- end_date - (weeks_back * 7) + wwy <- get_iso_week_year(check_date) + + # Build week pattern and check if it exists + week_pattern <- sprintf("week%02d_%d", wwy$week, wwy$year) + files_this_week <- list.files(kpi_dir, pattern = week_pattern) + has_kpis <- length(files_this_week) > 0 + + # Track missing weeks + kpis_needed <- rbind(kpis_needed, data.frame( + week = wwy$week, + year = wwy$year, + date = check_date, + has_kpis = has_kpis, + pattern = week_pattern, + file_count = length(files_this_week) + )) + + # Debug logging + smartcane_debug(sprintf( + "Week %02d/%d (%s): %s (%d files)", + wwy$week, wwy$year, format(check_date, "%Y-%m-%d"), + if (has_kpis) "✓ FOUND" else "✗ MISSING", + length(files_this_week) + )) + } + + # Summary statistics + missing_count <- sum(!kpis_needed$has_kpis) + all_complete <- missing_count == 0 + + return(list( + kpis_df = kpis_needed, + kpi_dir = kpi_dir, + missing_count = missing_count, + missing_weeks = kpis_needed[!kpis_needed$has_kpis, ], + all_complete = all_complete + )) +} + +# ============================================================================== +# HELPER FUNCTIONS FOR run_full_pipeline.R PATH VERIFICATION (SC-116) +# ============================================================================== +# These functions replace hardcoded file.path() calls in run_full_pipeline.R +# with centralized, testable helper functions. Each function verifies a specific +# output directory for a pipeline stage. + +#' Get verification path for Script 31 harvest output +#' +#' @param project_dir Character. Project name (e.g., "angata", "aura") +#' @param week_num Integer. ISO week number (01-53) +#' @param year_num Integer. Year (e.g., 2026) +#' @return Character. Full path to expected harvest imminent CSV file +#' @details +#' Script 31 generates: {project}_{project}_harvest_imminent_week_{WW}_{YYYY}.csv +#' Location: laravel_app/storage/app/{project}/reports/kpis/field_stats/ +#' +get_harvest_output_path <- function(project_dir, week_num, year_num) { + file.path( + "laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats", + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, week_num, year_num) + ) +} + +#' Check if harvest output file exists for a given week +#' +#' @param project_dir Character. Project name +#' @param week_num Integer. ISO week number +#' @param year_num Integer. Year +#' @return Logical. TRUE if file exists, FALSE otherwise +#' +check_harvest_output_exists <- function(project_dir, week_num, year_num) { + path <- get_harvest_output_path(project_dir, week_num, year_num) + file.exists(path) +} + +#' Get expected output directory for a mosaic verification based on mode +#' +#' @param project_dir Character. Project name +#' @param mosaic_mode Character. Either "tiled" or "single-file" +#' @return Character. Full path to mosaic directory +#' +#' @details +#' Tiled: laravel_app/storage/app/{project}/weekly_tile_max/ +#' Single-file: laravel_app/storage/app/{project}/weekly_mosaic/ +#' +get_mosaic_verification_dir <- function(project_dir, mosaic_mode) { + base <- file.path("laravel_app", "storage", "app", project_dir) + + if (mosaic_mode == "tiled") { + file.path(base, "weekly_tile_max") + } else { + # Default to single-file (single-file is standard for per-field architecture) + file.path(base, "weekly_mosaic") + } +} + +#' Check if mosaic files exist for a specific week +#' +#' @param project_dir Character. Project name +#' @param week_num Integer. ISO week number +#' @param year_num Integer. Year +#' @param mosaic_mode Character. "tiled" or "single-file" +#' @return List with created (logical), file_count (int), and sample_files (char vector) +#' +check_mosaic_exists <- function(project_dir, week_num, year_num, mosaic_mode) { + mosaic_dir <- get_mosaic_verification_dir(project_dir, mosaic_mode) + + if (!dir.exists(mosaic_dir)) { + return(list(created = FALSE, file_count = 0, sample_files = character())) + } + + week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) + # Search recursively for per-field architecture support + mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE) + + list( + created = length(mosaic_files) > 0, + file_count = length(mosaic_files), + sample_files = head(mosaic_files, 3) # First 3 files as sample + ) +} + +# 9. Initialize the project +# ---------------------- +# Export project directories and settings +initialize_project <- function(project_dir, data_source = "merged_tif") { + # Set up directory structure, passing data_source to select TIF folder + dirs <- setup_project_directories(project_dir, data_source = data_source) + + # Set up logging + logging <- setup_logging(dirs$log_dir) + + # Load field boundaries + boundaries <- load_field_boundaries(dirs$data_dir) + + # Load harvesting data + harvesting_data <- load_harvesting_data(dirs$data_dir) + + # Return all initialized components + return(c( + dirs, + list( + logging = logging, + field_boundaries = boundaries$field_boundaries, + field_boundaries_sf = boundaries$field_boundaries_sf, + harvesting_data = harvesting_data + ) + )) +} + +# When script is sourced, initialize with the global project_dir variable if it exists +if (exists("project_dir")) { + # Now we can safely log before initialization + log_message(paste("Initializing project with directory:", project_dir)) + + # Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default + data_src <- if (exists("data_source")) data_source else "merged_tif" + log_message(paste("Using data source directory:", data_src)) + + project_config <- initialize_project(project_dir, data_source = data_src) + + # Expose all variables to the global environment + list2env(project_config, envir = .GlobalEnv) + + # Log project initialization completion with tile mode info + log_message(paste("Project initialized with directory:", project_dir)) + if (exists("use_tile_mosaic")) { + mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE" + log_message(paste("Mosaic mode detected:", mosaic_mode)) + if (exists("tile_detection_info") && !is.null(tile_detection_info)) { + log_message(paste(" - Detection source:", tile_detection_info$detected_source)) + log_message(paste(" - Grid size:", tile_detection_info$grid_size)) + log_message(paste(" - Detected files in storage:", tile_detection_info$detected_count)) + if (length(tile_detection_info$sample_tiles) > 0) { + log_message(paste(" - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", "))) + } + } + } +} else { + warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R") +} + + + +#' Safe Logging Function +#' +#' Generic logging with [LEVEL] prefix. Works standalone without any framework. +#' Consistent with SmartCane logging standard. +#' +#' @param message The message to log +#' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG" +#' @return NULL (invisible, used for side effects) +#' +#' @examples +#' safe_log("Processing started", "INFO") +#' safe_log("Check input file", "WARNING") +#' safe_log("Failed to load data", "ERROR") +#' +safe_log <- function(message, level = "INFO") { + prefix <- sprintf("[%s]", level) + cat(sprintf("%s %s\n", prefix, message)) +} + +#' SmartCane Debug Logging (Conditional) +#' +#' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set. +#' Useful for development/troubleshooting without cluttering normal output. +#' +#' @param message The message to log +#' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE) +#' @return NULL (invisible, used for side effects) +#' +#' @examples +#' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE +#' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs +#' +smartcane_debug <- function(message, verbose = FALSE) { + if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") { + return(invisible(NULL)) + } + safe_log(message, level = "DEBUG") +} + +#' SmartCane Warning Logging +#' +#' Logs WARN-level messages. Convenience wrapper around safe_log(). +#' +#' @param message The message to log +#' @return NULL (invisible, used for side effects) +#' +#' @examples +#' smartcane_warn("Check data format before proceeding") +#' +smartcane_warn <- function(message) { + safe_log(message, level = "WARN") +} + +#' Load Field Boundaries from GeoJSON +#' +#' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson). +#' Handles CRS validation and column standardization. +#' +#' @param data_dir Directory containing GeoJSON file +#' @return List with elements: +#' - field_boundaries_sf: sf (Simple Features) object +#' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback) +#' +#' @details +#' Automatically selects pivot_2.geojson for ESA project during CI extraction, +#' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries. +#' +#' @examples +#' boundaries <- load_field_boundaries("laravel_app/storage/app/angata") +#' head(boundaries$field_boundaries_sf) +#' +load_field_boundaries <- function(data_dir) { + # Choose field boundaries file based on project and script type + # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model) + # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson + use_pivot_2 <- exists("project_dir") && project_dir == "esa" && + exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03 + + if (use_pivot_2) { + field_boundaries_path <- file.path(data_dir, "pivot_2.geojson") + } else { + field_boundaries_path <- file.path(data_dir, "pivot.geojson") + } + + if (!file.exists(field_boundaries_path)) { + stop(paste("Field boundaries file not found at path:", field_boundaries_path)) + } + + tryCatch({ + # Read GeoJSON with explicit CRS handling + field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE) + + # Remove OBJECTID column immediately if it exists + if ("OBJECTID" %in% names(field_boundaries_sf)) { + field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID) + } + + # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.) + # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.) + # to prevent S2 geometry validation errors during downstream processing + field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf) + + # Validate and fix CRS if needed + tryCatch({ + # Simply assign WGS84 if not already set (safe approach) + if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) { + st_crs(field_boundaries_sf) <- 4326 + warning("CRS was missing, assigned WGS84 (EPSG:4326)") + } + }, error = function(e) { + tryCatch({ + st_crs(field_boundaries_sf) <<- 4326 + }, error = function(e2) { + warning(paste("Could not set CRS:", e2$message)) + }) + }) + + # Handle column names - accommodate optional sub_area column + if ("sub_area" %in% names(field_boundaries_sf)) { + field_boundaries_sf <- field_boundaries_sf %>% + dplyr::select(field, sub_field, sub_area) %>% + sf::st_set_geometry("geometry") + } else { + field_boundaries_sf <- field_boundaries_sf %>% + dplyr::select(field, sub_field) %>% + sf::st_set_geometry("geometry") + } + + # Convert to terra vector if possible, otherwise use sf + field_boundaries <- tryCatch({ + field_boundaries_terra <- terra::vect(field_boundaries_sf) + crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL) + crs_str <- if (!is.null(crs_value)) as.character(crs_value) else "" + + if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) { + terra::crs(field_boundaries_terra) <- "EPSG:4326" + warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)") + } + field_boundaries_terra + + }, error = function(e) { + warning(paste("Terra conversion failed, using sf object instead:", e$message)) + field_boundaries_sf + }) + + return(list( + field_boundaries_sf = field_boundaries_sf, + field_boundaries = field_boundaries + )) + }, error = function(e) { + cat("[DEBUG] Error in load_field_boundaries:\n") + cat(" Message:", e$message, "\n") + cat(" Call:", deparse(e$call), "\n") + stop(paste("Error loading field boundaries:", e$message)) + }) +} + + + +#' Generate a Sequence of Dates for Processing +#' +#' Creates a date range from start_date to end_date and extracts week/year info. +#' Used by Scripts 20, 30, 40 to determine data processing windows. +#' +#' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string) +#' @param offset Number of days to look back from end_date (e.g., 7 for one week) +#' @return A list containing: +#' - week: ISO week number of start_date +#' - year: ISO year of start_date +#' - days_filter: Vector of dates in "YYYY-MM-DD" format +#' - start_date: Start date as Date object +#' - end_date: End date as Date object +#' +#' @details +#' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return +#' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations, +#' use `lubridate::isoweek()` and `lubridate::isoyear()` instead. +#' +#' @examples +#' dates <- date_list(as.Date("2025-01-15"), offset = 7) +#' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15") +#' +#' dates <- date_list("2025-12-31", offset = 14) +#' # Handles string input and returns 14 days of data +#' +date_list <- function(end_date, offset) { + # Input validation + if (!lubridate::is.Date(end_date)) { + end_date <- as.Date(end_date) + if (is.na(end_date)) { + stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.") + } + } + + offset <- as.numeric(offset) + if (is.na(offset) || offset < 1) { + stop("Invalid offset provided. Expected a positive number.") + } + + # Calculate date range + offset <- offset - 1 # Adjust offset to include end_date + start_date <- end_date - lubridate::days(offset) + + # Extract ISO week and year information (from END date for reporting period) + week <- lubridate::isoweek(end_date) + year <- lubridate::isoyear(end_date) + + # Generate sequence of dates + days_filter <- seq(from = start_date, to = end_date, by = "day") + days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering + + # Log the date range + safe_log(paste("Date range generated from", start_date, "to", end_date)) + + return(list( + "week" = week, + "year" = year, + "days_filter" = days_filter, + "start_date" = start_date, + "end_date" = end_date + )) +} + +# ============================================================================== +#' Repair Invalid GeoJSON Geometries +#' +#' Fixes common geometry issues in GeoJSON/sf objects: +#' - Degenerate vertices (duplicate points) +#' - Self-intersecting polygons +#' - Invalid ring orientation +#' - Empty or NULL geometries +#' +#' Uses sf::st_make_valid() with buffer trick as fallback. +#' +#' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries +#' @return sf object with repaired geometries +#' +#' @details +#' **Why this matters:** +#' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting +#' rings from manual editing or GIS data sources. These cause errors when using +#' S2 geometry (strict validation) during cropping operations. +#' +#' **Repair strategy (priority order):** +#' 1. Try st_make_valid() - GEOS-based repair (most reliable) +#' 2. Fallback: st_union() + buffer(0) - Forces polygon validity +#' 3. Last resort: Silently keep original if repair fails +#' +#' @examples +#' \dontrun{ +#' fields <- st_read("pivot.geojson") +#' fields_fixed <- repair_geojson_geometries(fields) +#' cat(paste("Fixed geometries: before=", +#' nrow(fields[!st_is_valid(fields), ]), +#' ", after=", +#' nrow(fields_fixed[!st_is_valid(fields_fixed), ]))) +#' } +#' +repair_geojson_geometries <- function(sf_object) { + if (!inherits(sf_object, "sf")) { + stop("Input must be an sf (Simple Features) object") + } + + # Count invalid geometries BEFORE repair + invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE) + + if (invalid_before == 0) { + safe_log("All geometries already valid - no repair needed", "INFO") + return(sf_object) + } + + safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING") + + # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS) + # This handles degenerate vertices, self-intersections, invalid rings while preserving all features + repaired <- tryCatch({ + # st_make_valid() on entire sf object preserves all features and attributes + repaired_geom <- sf::st_make_valid(sf_object) + + # Verify we still have the same number of rows + if (nrow(repaired_geom) != nrow(sf_object)) { + warning("st_make_valid() changed number of features - attempting row-wise repair") + + # Fallback: Repair row-by-row to maintain original structure + repaired_geom <- sf_object + for (i in seq_len(nrow(sf_object))) { + tryCatch({ + if (!sf::st_is_valid(sf_object[i, ])) { + repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ]) + } + }, error = function(e) { + safe_log(paste("Could not repair row", i, "-", e$message), "WARNING") + }) + } + } + + safe_log("✓ st_make_valid() successfully repaired geometries", "INFO") + repaired_geom + }, error = function(e) { + safe_log(paste("st_make_valid() failed:", e$message), "WARNING") + NULL + }) + + # If repair failed, keep original + if (is.null(repaired)) { + safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"), + "WARNING") + return(sf_object) + } + + # Count invalid geometries AFTER repair + invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE) + safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO") + + return(repaired) +} + +# ============================================================================== +# END 00_COMMON_UTILS.R +# ============================================================================== diff --git a/r_app/report_utils.R b/r_app/report_utils.R index 8cabcbf..15b0c95 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -4,24 +4,6 @@ # These functions support the creation of maps, charts and report elements # for the CI_report_dashboard_planet.Rmd document. -#' Safe logging function that works whether log_message exists or not -#' -#' @param message The message to log -#' @param level The log level (default: "INFO") -#' @return NULL (used for side effects) -#' -safe_log <- function(message, level = "INFO") { - if (exists("log_message")) { - log_message(message, level) - } else { - if (level %in% c("ERROR", "WARNING")) { - warning(message) - } else { - message(message) - } - } -} - #' Creates a sub-chunk for use within RMarkdown documents #' #' @param g A ggplot object to render in the sub-chunk @@ -244,14 +226,14 @@ ci_plot <- function(pivotName, # Filter for the specific pivot AllPivots2 <- field_boundaries %>% dplyr::filter(field %in% pivotName) - # Create crop masks for different timepoints using terra functions - singlePivot <- terra::crop(current_ci, pivotShape) %>% terra::mask(., pivotShape) - singlePivot_m1 <- terra::crop(ci_minus_1, pivotShape) %>% terra::mask(., pivotShape) - singlePivot_m2 <- terra::crop(ci_minus_2, pivotShape) %>% terra::mask(., pivotShape) + # Per-field mosaics are already cropped to field boundaries, so use directly without cropping + singlePivot <- current_ci + singlePivot_m1 <- ci_minus_1 + singlePivot_m2 <- ci_minus_2 - # Create difference maps - abs_CI_last_week <- terra::crop(last_week_diff, pivotShape) %>% terra::mask(., pivotShape) - abs_CI_three_week <- terra::crop(three_week_diff, pivotShape) %>% terra::mask(., pivotShape) + # Use difference maps directly (already field-specific) + abs_CI_last_week <- last_week_diff + abs_CI_three_week <- three_week_diff # Get planting date planting_date <- harvesting_data %>% @@ -822,3 +804,144 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c }) } +#' Aggregate per-field weekly mosaics into a farm-level mosaic +#' +#' Reads all per-field mosaic TIFs for a given week and merges them into a single farm-level mosaic. +#' This is used for overview maps in the report (Script 90). +#' +#' Per-field mosaics already have proper geospatial metadata (CRS, geotransform) from Script 10, +#' so terra can align them automatically without needing field boundaries or extent information. +#' +#' @param weekly_mosaic_dir Path to weekly_mosaic directory (e.g., "laravel_app/storage/app/{project}/weekly_mosaic") +#' @param target_week ISO week number (e.g., 52) +#' @param target_year ISO year (e.g., 2025) +#' @return SpatRaster object (5-band: R,G,B,NIR,CI) or NULL if no fields found +#' +#' @details +#' Per-field mosaics are located at: weekly_mosaic/{FIELD}/week_WW_YYYY.tif +#' This function: +#' 1. Finds all per-field subdirectories +#' 2. Loads each field's weekly mosaic +#' 3. Merges to a single raster using terra::mosaic() (alignment handled automatically by metadata) +#' 4. Returns combined 5-band raster for visualization +#' +aggregate_per_field_mosaics_to_farm_level <- function( + weekly_mosaic_dir, + target_week, + target_year +) { + + tryCatch({ + + # Validate directory exists + if (!dir.exists(weekly_mosaic_dir)) { + safe_log(paste("Weekly mosaic directory not found:", weekly_mosaic_dir), "WARNING") + return(NULL) + } + + # Find all per-field subdirectories (non-TIF files at top level) + all_items <- list.files(weekly_mosaic_dir, full.names = FALSE) + field_dirs <- all_items[ + !grepl("\\.tif$", all_items, ignore.case = TRUE) & + dir.exists(file.path(weekly_mosaic_dir, all_items)) + ] + + if (length(field_dirs) == 0) { + safe_log(paste("No per-field directories found in", weekly_mosaic_dir), "WARNING") + return(NULL) + } + + safe_log(paste("Found", length(field_dirs), "field directories. Aggregating week", + sprintf("%02d", target_week), target_year), "INFO") + + # Collect rasters from each field + raster_list <- list() + + for (field_dir in field_dirs) { + field_mosaic_path <- file.path( + weekly_mosaic_dir, + field_dir, + paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif") + ) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + r <- terra::rast(field_mosaic_path) + raster_list[[field_dir]] <- r + safe_log(paste("Loaded mosaic for field:", field_dir), "DEBUG") + }, error = function(e) { + safe_log(paste("Could not load mosaic for field", field_dir, ":", e$message), "WARNING") + }) + } + } + + if (length(raster_list) == 0) { + safe_log(paste("No field mosaics found for week", sprintf("%02d", target_week), target_year), "WARNING") + return(NULL) + } + + safe_log(paste("Successfully loaded mosaics for", length(raster_list), "fields"), "INFO") + + # Create a SpatRasterCollection and mosaic using correct terra syntax + tryCatch({ + rsrc <- terra::sprc(raster_list) + safe_log(paste("Created SpatRasterCollection with", length(raster_list), "rasters"), "DEBUG") + + # Mosaic the rasters - this merges them into a single continuous raster + farm_mosaic <- terra::mosaic(rsrc) + + safe_log(paste("Aggregated", length(raster_list), "per-field mosaics into farm-level mosaic"), "INFO") + + # Verify mosaic was created successfully + if (is.null(farm_mosaic)) { + stop("mosaic() returned NULL") + } + + return(farm_mosaic) + + }, error = function(e) { + safe_log(paste("Error during mosaic creation:", e$message), "ERROR") + return(NULL) + }) + + }, error = function(e) { + safe_log(paste("Error aggregating per-field mosaics:", e$message), "ERROR") + return(NULL) + }) +} + + +#' Get per-field mosaic path (new per-field architecture) +#' +#' Returns the path to a per-field weekly mosaic for direct visualization. +#' Replaces the old cropping workflow: now we load the field's own mosaic instead of cropping farm-level. +#' +#' @param weekly_mosaic_dir Path to weekly_mosaic directory +#' @param field_name Name of the field +#' @param target_week ISO week number +#' @param target_year ISO year +#' @return Path to field-specific mosaic TIF, or NULL if not found +#' +get_per_field_mosaic_path <- function( + weekly_mosaic_dir, + field_name, + target_week, + target_year +) { + + path <- file.path( + weekly_mosaic_dir, + field_name, + paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif") + ) + + if (file.exists(path)) { + return(path) + } else { + safe_log(paste("Per-field mosaic not found for field", field_name, + "week", sprintf("%02d", target_week), target_year), "WARNING") + return(NULL) + } +} + + diff --git a/r_app/run_full_pipeline.R b/r_app/run_full_pipeline.R index ae6ff14..65d162f 100644 --- a/r_app/run_full_pipeline.R +++ b/r_app/run_full_pipeline.R @@ -5,12 +5,13 @@ # 1. Python: Download Planet images # 2. R 10: Create master grid and split TIFFs # 3. R 20: CI Extraction -# 4. R 21: Convert CI RDS to CSV -# 5. R 30: Interpolate growth model +# 4. R 30: Interpolate growth model +# 5. R 21: Convert CI RDS to CSV (uses Script 30 output) # 6. Python 31: Harvest imminent weekly # 7. R 40: Mosaic creation # 8. R 80: Calculate KPIs -# +# 9. R 90 (Agronomic) OR R 91 (Cane Supply): Generate Word Report +# # ============================================================================== # HOW TO RUN THIS SCRIPT # ============================================================================== @@ -30,11 +31,12 @@ # ============================================================================== # *** EDIT THESE VARIABLES *** -end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date() +end_date <- as.Date("2026-02-04") # or specify: as.Date("2026-01-27") , Sys.Date() offset <- 7 # days to look back project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba" -data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist +migrate_legacy_format <- TRUE # Set to TRUE to migrate from old format (merged_tif/merged_tif_8b) to new format (field_tiles) +# *** NOTE: data_source is now unified - all projects use field_tiles after migration *** # *************************** # Format dates @@ -48,23 +50,35 @@ pipeline_success <- TRUE # ============================================================================== cat("\n========== CHECKING EXISTING OUTPUTS ==========\n") -# Check Script 10 outputs (tiled splits) -tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") -tiles_dates <- if (dir.exists(tiles_dir)) { - list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) -} else { - c() +# Check Script 10 outputs (field_tiles with per-field TIFFs) +# Script 10 outputs to field_tiles/{field_id}/{date}.tif +field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles") +tiles_dates <- c() +if (dir.exists(field_tiles_dir)) { + # Get all field subdirectories + field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs) > 0) { + # Get unique dates from all field directories + all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + tiles_dates <- unique(sub("\\.tif$", "", all_files)) + } } -cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates))) +cat(sprintf("Script 10: %d dates already tiled (field_tiles/)\n", length(tiles_dates))) -# Check Script 20 outputs (CI extraction) - daily RDS files -ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") -ci_files <- if (dir.exists(ci_daily_dir)) { - list.files(ci_daily_dir, pattern = "\\.rds$") -} else { - c() +# Check Script 20 outputs (CI extraction) - per-field CI TIFFs at field_tiles_CI/{FIELD}/{DATE}.tif +# NOTE: This is the NEW per-field format, not the old extracted_ci/ flat format +field_tiles_ci_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI") +ci_tiff_dates <- c() +if (dir.exists(field_tiles_ci_dir)) { + # Get all field subdirectories + field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs) > 0) { + # Get unique dates from all field directories (dates that have been processed through Script 20) + all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + ci_tiff_dates <- unique(sub("\\.tif$", "", all_files)) + } } -cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files))) +cat(sprintf("Script 20: %d dates already processed (field_tiles_CI/)\n", length(ci_tiff_dates))) # Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this # Instead, check if CI RDS files exist - if they do, 21 should also run @@ -72,7 +86,7 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files))) cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n") # Check Script 40 outputs (mosaics in weekly_tile_max/5x5) -mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") +mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") mosaic_files <- if (dir.exists(mosaic_dir)) { list.files(mosaic_dir, pattern = "\\.tif$") } else { @@ -90,9 +104,9 @@ kpi_files <- if (dir.exists(kpi_dir)) { cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files))) # Determine if scripts should run based on outputs -skip_10 <- length(tiles_dates) > 0 && !force_rerun -skip_20 <- length(ci_files) > 0 && !force_rerun -skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped +skip_10 <- FALSE # Script 10 should always run to pick up any new merged_tif files +skip_20 <- FALSE # Script 20 always runs to process dates in the current window (per-field format) +skip_21 <- FALSE # Skip 21 only if 20 is skipped skip_40 <- length(mosaic_files) > 0 && !force_rerun skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks @@ -110,25 +124,50 @@ cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n") tryCatch({ # Setup paths base_path <- file.path("laravel_app", "storage", "app", project_dir) - merged_tifs_dir <- file.path(base_path, data_source) - # Get existing dates from raw TIFFs + # Always check merged_tif/ for existing downloads (both modes) + # merged_tif/ is where Python downloads go, before Script 10 splits to field_tiles/ + merged_tifs_dir <- file.path(base_path, "merged_tif") existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) - # Get existing dates from tiles (better indicator of completion) - existing_tile_dates <- tiles_dates + if (migrate_legacy_format) { + cat(sprintf(" Migration mode: Checking merged_tif/ for existing dates\n")) + } else { + cat(sprintf(" Production mode: Checking merged_tif/ and field_tiles/ for existing dates\n")) + } # Find missing dates in the window + # Window: from (end_date - offset) to end_date + # Example: if end_date=2026-02-04 and offset=7, window is 2026-01-28 to 2026-02-04 (8 dates) start_date <- end_date - offset date_seq <- seq(start_date, end_date, by = "day") target_dates <- format(date_seq, "%Y-%m-%d") - # Only download if tiles don't exist yet (more reliable than checking raw TIFFs) - missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)] + # Also check field_tiles/ for dates that have already been processed through Script 10 + # field_tiles/ contains {field_id}/{date}.tif files - check which dates are present + field_tiles_dir <- file.path(base_path, "field_tiles") + processed_dates <- c() + if (dir.exists(field_tiles_dir)) { + # Get all field subdirectories + field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs) > 0) { + # Get unique dates from all field directories + all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + processed_dates <- unique(sub("\\.tif$", "", all_files)) + } + } - cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates))) - cat(sprintf(" Missing dates in window: %d\n", length(missing_dates))) + # Combine existing dates from both merged_tif and field_tiles + all_existing_dates <- unique(c(existing_tiff_dates, processed_dates)) + + # Compare: which target dates don't exist in merged_tif/ or field_tiles/? + missing_dates <- target_dates[!(target_dates %in% all_existing_dates)] + + cat(sprintf(" Existing dates in merged_tif/: %d\n", length(existing_tiff_dates))) + cat(sprintf(" Processed dates in field_tiles/: %d\n", length(processed_dates))) + cat(sprintf(" Target window: %s to %s (%d dates)\n", start_date, end_date, length(target_dates))) + cat(sprintf(" Missing dates to download: %d\n", length(missing_dates))) # Download each missing date download_count <- 0 @@ -170,6 +209,51 @@ tryCatch({ pipeline_success <<- FALSE }) +# ============================================================================== +# MIGRATION: Move legacy format files to new format (if enabled) +# ============================================================================== +if (pipeline_success && migrate_legacy_format) { + cat("\n========== MIGRATION: MOVING LEGACY FORMAT FILES ==========\n") + tryCatch({ + base_path <- file.path("laravel_app", "storage", "app", project_dir) + + # PART 1: Move merged_tif files to field_tiles + merged_tif_old <- file.path(base_path, "merged_tif") + field_tiles_new <- file.path(base_path, "field_tiles") + + if (dir.exists(merged_tif_old)) { + tif_files <- list.files(merged_tif_old, pattern = "\\.tif$", full.names = TRUE) + if (length(tif_files) > 0) { + dir.create(field_tiles_new, showWarnings = FALSE, recursive = TRUE) + for (file in tif_files) { + file.rename(file, file.path(field_tiles_new, basename(file))) + } + cat(sprintf("✓ Moved %d TIFF files from merged_tif/ to field_tiles/\n", length(tif_files))) + } + } + + # PART 2: Move merged_tif_final files (CI) to field_tiles_CI + merged_tif_final_old <- file.path(base_path, "merged_tif_final") + field_tiles_ci_new <- file.path(base_path, "field_tiles_CI") + + if (dir.exists(merged_tif_final_old)) { + ci_files <- list.files(merged_tif_final_old, pattern = "\\.tif$", full.names = TRUE) + if (length(ci_files) > 0) { + dir.create(field_tiles_ci_new, showWarnings = FALSE, recursive = TRUE) + for (file in ci_files) { + file.rename(file, file.path(field_tiles_ci_new, basename(file))) + } + cat(sprintf("✓ Moved %d CI TIFF files from merged_tif_final/ to field_tiles_CI/\n", length(ci_files))) + } + } + + cat("✓ Migration completed successfully\n") + }, error = function(e) { + cat("✗ Error in migration:", e$message, "\n") + pipeline_success <<- FALSE + }) +} + # ============================================================================== # SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs # ============================================================================== @@ -178,20 +262,38 @@ if (pipeline_success && !skip_10) { tryCatch({ # Set environment variables for the script (Script 10 uses these for filtering) assign("PROJECT", project_dir, envir = .GlobalEnv) + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + + # Count field_tiles/ dates BEFORE Script 10 runs + field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles") + field_dirs_before <- c() + if (dir.exists(field_tiles_dir)) { + field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_tmp) > 0) { + all_files_before <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + field_dirs_before <- unique(sub("\\.tif$", "", all_files_before)) + } + } # Suppress verbose per-date output, show only summary sink(nullfile()) - source("r_app/10_create_master_grid_and_split_tiffs.R") + source("r_app/10_create_per_field_tiffs.R") sink() - # Verify output - tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") - if (dir.exists(tiles_dir)) { - subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) - cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs))) - } else { - cat("✓ Script 10 completed\n") + # Count field_tiles/ dates AFTER Script 10 runs + field_dirs_after <- c() + if (dir.exists(field_tiles_dir)) { + field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_tmp) > 0) { + all_files_after <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + field_dirs_after <- unique(sub("\\.tif$", "", all_files_after)) + } } + + # Calculate newly added dates + newly_added <- length(field_dirs_after) - length(field_dirs_before) + cat(sprintf("✓ Script 10 completed - processed %d new dates (total: %d dates in field_tiles/)\n", max(0, newly_added), length(field_dirs_after))) }, error = function(e) { sink() cat("✗ Error in Script 10:", e$message, "\n") @@ -211,19 +313,71 @@ if (pipeline_success && !skip_20) { assign("end_date", end_date, envir = .GlobalEnv) assign("offset", offset, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) - source("r_app/20_ci_extraction.R") - main() # Call main() to execute the script with the environment variables - - # Verify CI output was created - ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") - if (dir.exists(ci_daily_dir)) { - files <- list.files(ci_daily_dir, pattern = "\\.rds$") - cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files))) - } else { - cat("✓ Script 20 completed\n") + # If in migration mode, find all dates that need processing + if (migrate_legacy_format) { + cat("Migration mode: Finding all dates in field_tiles/ that need CI processing...\n") + + # Get all dates from field_tiles/ + field_tiles_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles") + all_dates_in_tiles <- c() + if (dir.exists(field_tiles_dir_check)) { + field_dirs_tmp <- list.dirs(field_tiles_dir_check, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_tmp) > 0) { + all_files_tmp <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + all_dates_in_tiles <- unique(sub("\\.tif$", "", all_files_tmp)) + } + } + + # Get dates already processed in field_tiles_CI/ + field_tiles_ci_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI") + processed_ci_dates <- c() + if (dir.exists(field_tiles_ci_check)) { + field_dirs_ci <- list.dirs(field_tiles_ci_check, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_ci) > 0) { + all_files_ci <- list.files(field_dirs_ci, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + processed_ci_dates <- unique(sub("\\.tif$", "", all_files_ci)) + } + } + + # Get dates already in old RDS format + old_rds_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") + processed_rds_dates <- c() + if (dir.exists(old_rds_dir)) { + rds_files <- list.files(old_rds_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$") + processed_rds_dates <- unique(sub("\\.rds$", "", rds_files)) + } + + # Find dates missing from either location + dates_missing_ci <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_ci_dates)] + dates_missing_rds <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_rds_dates)] + dates_to_process_migration <- sort(unique(c(dates_missing_ci, dates_missing_rds))) + + cat(sprintf(" All dates in field_tiles/: %d\n", length(all_dates_in_tiles))) + cat(sprintf(" Already in field_tiles_CI/: %d\n", length(processed_ci_dates))) + cat(sprintf(" Already in extracted_ci/daily_vals/: %d\n", length(processed_rds_dates))) + cat(sprintf(" Dates needing processing: %d\n", length(dates_to_process_migration))) + + if (length(dates_to_process_migration) > 0) { + assign("dates_to_process", dates_to_process_migration, envir = .GlobalEnv) + cat(sprintf(" Will process: %s to %s\n", dates_to_process_migration[1], dates_to_process_migration[length(dates_to_process_migration)])) + } } + + source("r_app/20_ci_extraction_per_field.R") + main() + + # Verify output + field_tiles_ci_verify <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI") + tiff_count <- 0 + if (dir.exists(field_tiles_ci_verify)) { + field_dirs_verify <- list.dirs(field_tiles_ci_verify, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_verify) > 0) { + all_files_verify <- list.files(field_dirs_verify, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + tiff_count <- length(all_files_verify) + } + } + cat(sprintf("✓ Script 20 completed - %d CI TIFFs in field_tiles_CI/\n", tiff_count)) }, error = function(e) { cat("✗ Error in Script 20:", e$message, "\n") pipeline_success <<- FALSE @@ -233,7 +387,35 @@ if (pipeline_success && !skip_20) { } # ============================================================================== -# SCRIPT 21: CONVERT CI RDS TO CSV +# SCRIPT 30: INTERPOLATE GROWTH MODEL +# ============================================================================== +if (pipeline_success) { + cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + + source("r_app/30_interpolate_growth_model.R") + main() # Call main() to execute the script with the environment variables + + # Verify interpolated output + growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated") + if (dir.exists(growth_dir)) { + files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$") + cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files))) + } else { + cat("✓ Script 30 completed\n") + } + }, error = function(e) { + cat("✗ Error in Script 30:", e$message, "\n") + pipeline_success <<- FALSE + }) +} + +# ============================================================================== +# SCRIPT 21: CONVERT CI RDS TO CSV (uses Script 30 output) # ============================================================================== if (pipeline_success && !skip_21) { cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n") @@ -262,35 +444,6 @@ if (pipeline_success && !skip_21) { cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n") } -# ============================================================================== -# SCRIPT 30: INTERPOLATE GROWTH MODEL -# ============================================================================== -if (pipeline_success) { - cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") - tryCatch({ - # Set environment variables for the script - assign("end_date", end_date, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) - assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) - - source("r_app/30_interpolate_growth_model.R") - main() # Call main() to execute the script with the environment variables - - # Verify interpolated output - growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated") - if (dir.exists(growth_dir)) { - files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$") - cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files))) - } else { - cat("✓ Script 30 completed\n") - } - }, error = function(e) { - cat("✗ Error in Script 30:", e$message, "\n") - pipeline_success <<- FALSE - }) -} - # ============================================================================== # PYTHON 31: HARVEST IMMINENT WEEKLY # ============================================================================== @@ -335,9 +488,8 @@ if (pipeline_success && !skip_40) { assign("end_date", end_date, envir = .GlobalEnv) assign("offset", offset, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) - source("r_app/40_mosaic_creation.R") + source("r_app/40_mosaic_creation_per_field.R") main() # Call main() to execute the script with the environment variables # Verify mosaic output @@ -368,7 +520,6 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur assign("end_date_str", end_date_str, envir = .GlobalEnv) assign("offset", offset, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) source("r_app/80_calculate_kpis.R") main() # Call main() to execute the script with the environment variables @@ -389,6 +540,92 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur }) } +# ============================================================================== +# SCRIPT 90/91: GENERATE WORD REPORTS (CLIENT-TYPE SPECIFIC) +# ============================================================================== +if (pipeline_success) { + # Determine client type from project mapping + source("r_app/parameters_project.R") + source("r_app/00_common_utils.R") + client_type <- get_client_type(project_dir) + + if (client_type == "agronomic_support") { + # SCRIPT 90: Agronomic Support Report (for Aura) + cat("\n========== RUNNING SCRIPT 90: AGRONOMIC SUPPORT REPORT (WORD) ==========\n") + tryCatch({ + # Render the R Markdown file with parameters + # The Rmd file will load parameters_project and utilities internally + rmarkdown::render( + "r_app/90_CI_report_with_kpis_simple.Rmd", + params = list( + data_dir = project_dir, + report_date = end_date, + mail_day = "Monday", + borders = TRUE, + ci_plot_type = "both", + colorblind_friendly = FALSE, + facet_by_season = FALSE, + x_axis_unit = "days" + ), + output_file = sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str), + output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"), + quiet = FALSE, + knit_root_dir = getwd() + ) + + # Verify report was created + report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", + sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str)) + if (file.exists(report_file)) { + cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", basename(report_file))) + } else { + cat("⚠ Script 90 report file not found - check rendering\n") + } + }, error = function(e) { + cat("✗ Error in Script 90:", e$message, "\n") + print(e) + pipeline_success <<- FALSE + }) + } else if (client_type == "cane_supply") { + # SCRIPT 91: Cane Supply Report (for Angata, Chemba, Xinavane, ESA) + cat("\n========== RUNNING SCRIPT 91: CANE SUPPLY REPORT (WORD) ==========\n") + tryCatch({ + # Render the R Markdown file with parameters + # The Rmd file will load parameters_project and utilities internally + rmarkdown::render( + "r_app/91_CI_report_with_kpis_Angata.Rmd", + params = list( + data_dir = project_dir, + report_date = end_date, + mail_day = "Monday", + borders = TRUE, + ci_plot_type = "both", + colorblind_friendly = FALSE, + facet_by_season = FALSE, + x_axis_unit = "days" + ), + output_file = sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str), + output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"), + quiet = FALSE, + knit_root_dir = getwd() + ) + + # Verify report was created + report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", + sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str)) + if (file.exists(report_file)) { + cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", basename(report_file))) + } else { + cat("⚠ Script 91 report file not found - check rendering\n") + } + }, error = function(e) { + cat("✗ Error in Script 91:", e$message, "\n") + print(e) + pipeline_success <<- FALSE + }) + } +} + # ============================================================================== # SUMMARY # ============================================================================== @@ -401,4 +638,4 @@ if (pipeline_success) { } else { cat("Status: ✗ Pipeline failed - check errors above\n") } -cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n") +cat("Pipeline sequence: Python Download → R 10 → R 20 → R 30 → R 21 → Python 31 → R 40 → R 80 → R 90/91\n")