Merge branch 'backward-compat-no-tiling' into code-improvements
This commit is contained in:
commit
51d479673d
2
.github/copilot-instructions.md
vendored
2
.github/copilot-instructions.md
vendored
|
|
@ -319,4 +319,4 @@ After each major stage, verify:
|
|||
|
||||
---
|
||||
|
||||
_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`. For related Linear issues (code quality, architecture docs), see SC-59, SC-60, SC-61._
|
||||
_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`.
|
||||
|
|
|
|||
|
|
@ -22,9 +22,9 @@ Options:
|
|||
--resolution RES Resolution in meters (default: 3)
|
||||
--skip-merge Skip merge step (download only, keep individual tiles)
|
||||
--cleanup Delete intermediate single_images folder after merge
|
||||
--clear-singles Clear single_images_8b folder before download
|
||||
--clear-merged Clear merged_tif_8b and merged_virtual_8b folders before download
|
||||
--clear-all Clear all output folders (singles, merged, virtual) before download
|
||||
--clear-singles Clear single_images folder before download
|
||||
--clear-merged Clear merged_tif folder before download
|
||||
--clear-all Clear all output folders (singles, merged) before download
|
||||
|
||||
Examples:
|
||||
python download_8band_pu_optimized.py xinavane --clear-singles --cleanup
|
||||
|
|
@ -89,20 +89,118 @@ def setup_config():
|
|||
config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44')
|
||||
config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos')
|
||||
|
||||
# BYOC collection for Planet 8-band data
|
||||
collection_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a'
|
||||
byoc = DataCollection.define_byoc(collection_id, name='planet_data_8b', is_timeless=True)
|
||||
|
||||
catalog = SentinelHubCatalog(config=config)
|
||||
|
||||
return config, byoc, catalog
|
||||
return config, catalog
|
||||
|
||||
|
||||
def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_days: int = 7) -> Tuple:
|
||||
"""
|
||||
Auto-detect which Planet collection is available for this project.
|
||||
|
||||
Checks a week of dates (backwards from date_str) to ensure robust detection.
|
||||
If ANY date has data in the new 8-band collection, use that.
|
||||
If no dates have data in new collection, fall back to legacy 4-band.
|
||||
|
||||
Args:
|
||||
date_str: Reference date (YYYY-MM-DD)
|
||||
bbox_list: List of bounding boxes for testing
|
||||
catalog: SentinelHubCatalog instance
|
||||
date_range_days: Number of days to check backwards (default: 7)
|
||||
|
||||
Returns:
|
||||
(byoc, collection_info_dict) where byoc is DataCollection and dict contains metadata
|
||||
"""
|
||||
|
||||
new_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' # 8-band (new)
|
||||
old_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c' # 4-band (legacy)
|
||||
test_bbox = bbox_list[0]
|
||||
|
||||
# Generate date range (backwards from date_str)
|
||||
try:
|
||||
ref_date = datetime.datetime.strptime(date_str, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
print(f"⚠️ Invalid date format: {date_str}. Using today.")
|
||||
ref_date = datetime.datetime.now()
|
||||
|
||||
date_range = [
|
||||
(ref_date - datetime.timedelta(days=i)).strftime('%Y-%m-%d')
|
||||
for i in range(date_range_days)
|
||||
]
|
||||
|
||||
print(f"\nAuto-detecting Planet collection (checking {date_range_days} days)...")
|
||||
print(f" Test range: {date_range[-1]} to {date_range[0]}")
|
||||
|
||||
# Try new collection first
|
||||
print(f"\n Trying 8-band collection: {new_id}")
|
||||
byoc_new = DataCollection.define_byoc(new_id, name='planet_data_8b', is_timeless=True)
|
||||
|
||||
for test_date in date_range:
|
||||
try:
|
||||
search = catalog.search(
|
||||
collection=byoc_new,
|
||||
bbox=test_bbox,
|
||||
time=(test_date, test_date),
|
||||
filter=None
|
||||
)
|
||||
tiles = list(search)
|
||||
if len(tiles) > 0:
|
||||
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
|
||||
print(f" ✓ Using 8-band collection")
|
||||
return byoc_new, {
|
||||
'collection_id': new_id,
|
||||
'name': 'planet_data_8b',
|
||||
'bands': 4,
|
||||
'output_folder': 'merged_tif',
|
||||
'singles_folder': 'single_images'
|
||||
}
|
||||
except Exception as e:
|
||||
print(f" ⚠️ {test_date}: {str(e)[:60]}")
|
||||
|
||||
# No data in new collection, try legacy
|
||||
print(f"\n ✗ No data found in 8-band collection")
|
||||
print(f" Trying legacy 4-band collection: {old_id}")
|
||||
byoc_old = DataCollection.define_byoc(old_id, name='planet_data', is_timeless=True)
|
||||
|
||||
for test_date in date_range:
|
||||
try:
|
||||
search = catalog.search(
|
||||
collection=byoc_old,
|
||||
bbox=test_bbox,
|
||||
time=(test_date, test_date),
|
||||
filter=None
|
||||
)
|
||||
tiles = list(search)
|
||||
if len(tiles) > 0:
|
||||
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
|
||||
print(f" ✓ Using legacy 4-band collection")
|
||||
return byoc_old, {
|
||||
'collection_id': old_id,
|
||||
'name': 'planet_data',
|
||||
'bands': 4,
|
||||
'output_folder': 'merged_tif',
|
||||
'singles_folder': 'single_images'
|
||||
}
|
||||
except Exception as e:
|
||||
print(f" ⚠️ {test_date}: {str(e)[:60]}")
|
||||
|
||||
# Neither collection has data
|
||||
print(f"\n ⚠️ No data found in either collection for {date_range_days} days")
|
||||
print(f" Defaulting to 8-band collection (will attempt download anyway)")
|
||||
return byoc_new, {
|
||||
'collection_id': new_id,
|
||||
'name': 'planet_data_8b',
|
||||
'bands': 4,
|
||||
'output_folder': 'merged_tif',
|
||||
'singles_folder': 'single_images'
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output
|
||||
# ============================================================================
|
||||
|
||||
EVALSCRIPT_4BAND_MASKED = """
|
||||
EVALSCRIPT_8BAND = """
|
||||
//VERSION=3
|
||||
function setup() {
|
||||
return {
|
||||
|
|
@ -117,9 +215,35 @@ EVALSCRIPT_4BAND_MASKED = """
|
|||
}
|
||||
function evaluatePixel(sample) {
|
||||
// Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0)
|
||||
// This reduces output pixels and avoids NaN interpolation on client side
|
||||
if (sample.udm1 == 0) {
|
||||
// Scale reflectance: DN → [0, 1] range
|
||||
var scaledRed = 2.5 * sample.red / 10000;
|
||||
var scaledGreen = 2.5 * sample.green / 10000;
|
||||
var scaledBlue = 2.5 * sample.blue / 10000;
|
||||
var scaledNIR = 2.5 * sample.nir / 10000;
|
||||
return [scaledRed, scaledGreen, scaledBlue, scaledNIR];
|
||||
} else {
|
||||
return [NaN, NaN, NaN, NaN];
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
EVALSCRIPT_4BAND_LEGACY = """
|
||||
//VERSION=3
|
||||
function setup() {
|
||||
return {
|
||||
input: [{
|
||||
bands: ["red", "green", "blue", "nir", "udm1"],
|
||||
units: "DN"
|
||||
}],
|
||||
output: {
|
||||
bands: 4
|
||||
}
|
||||
};
|
||||
}
|
||||
function evaluatePixel(sample) {
|
||||
// Cloud masking for legacy collection (same band names as new 8-band)
|
||||
// udm1 = 0 means clear, non-zero means cloud/shadow/etc
|
||||
if (sample.udm1 == 0) {
|
||||
var scaledRed = 2.5 * sample.red / 10000;
|
||||
var scaledGreen = 2.5 * sample.green / 10000;
|
||||
var scaledBlue = 2.5 * sample.blue / 10000;
|
||||
|
|
@ -289,6 +413,7 @@ def download_tile(
|
|||
output_dir: Path,
|
||||
config,
|
||||
byoc,
|
||||
evalscript: str,
|
||||
resolution: int = 3
|
||||
) -> bool:
|
||||
"""Download a single full tile (no geometry masking = lower PU) with exponential backoff."""
|
||||
|
|
@ -300,9 +425,9 @@ def download_tile(
|
|||
try:
|
||||
size = bbox_to_dimensions(bbox, resolution=resolution)
|
||||
|
||||
# Create download request with 4-band cloud-masked evalscript (uint16)
|
||||
# Create download request with appropriate evalscript for collection
|
||||
request = SentinelHubRequest(
|
||||
evalscript=EVALSCRIPT_4BAND_MASKED,
|
||||
evalscript=evalscript,
|
||||
input_data=[
|
||||
SentinelHubRequest.input_data(
|
||||
data_collection=byoc,
|
||||
|
|
@ -350,6 +475,8 @@ def download_date(
|
|||
base_path: Path,
|
||||
config,
|
||||
byoc,
|
||||
evalscript: str,
|
||||
collection_info: dict,
|
||||
resolution: int = 3
|
||||
) -> int:
|
||||
"""
|
||||
|
|
@ -357,14 +484,14 @@ def download_date(
|
|||
Returns number of successfully downloaded tiles.
|
||||
"""
|
||||
|
||||
output_dir = base_path / 'single_images_8b' / date_str
|
||||
output_dir = base_path / collection_info['singles_folder'] / date_str
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...")
|
||||
|
||||
successful = 0
|
||||
for idx, bbox in enumerate(bbox_list, 1):
|
||||
if download_tile(date_str, bbox, output_dir, config, byoc, resolution):
|
||||
if download_tile(date_str, bbox, output_dir, config, byoc, evalscript, resolution):
|
||||
successful += 1
|
||||
|
||||
percentage = (idx / len(bbox_list)) * 100
|
||||
|
|
@ -385,10 +512,10 @@ def download_date(
|
|||
# MERGE FUNCTION
|
||||
# ============================================================================
|
||||
|
||||
def merge_tiles(date_str: str, base_path: Path) -> bool:
|
||||
def merge_tiles(date_str: str, base_path: Path, collection_info: dict) -> bool:
|
||||
"""Merge downloaded tiles into single GeoTIFF using GDAL."""
|
||||
|
||||
single_images_dir = base_path / 'single_images_8b' / date_str
|
||||
single_images_dir = base_path / collection_info['singles_folder'] / date_str
|
||||
|
||||
# Find all response.tiff files
|
||||
file_list = [str(p) for p in single_images_dir.rglob('response.tiff')]
|
||||
|
|
@ -397,8 +524,8 @@ def merge_tiles(date_str: str, base_path: Path) -> bool:
|
|||
print(f" ✗ No tiles found to merge")
|
||||
return False
|
||||
|
||||
merged_tif_dir = base_path / 'merged_tif_8b'
|
||||
merged_vrt_dir = base_path / 'merged_virtual_8b'
|
||||
merged_tif_dir = base_path / collection_info['output_folder']
|
||||
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
|
||||
merged_tif_dir.mkdir(parents=True, exist_ok=True)
|
||||
merged_vrt_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
|
@ -453,9 +580,9 @@ def main():
|
|||
|
||||
# Parse arguments
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Download Planet 8-band imagery with PU optimization'
|
||||
description='Download Planet imagery with PU optimization (auto-detects 8-band vs legacy 4-band)'
|
||||
)
|
||||
parser.add_argument('project', help='Project name (angata, chemba, xinavane, etc.)')
|
||||
parser.add_argument('project', help='Project name (angata, chemba, xinavane, aura, etc.)')
|
||||
parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today')
|
||||
parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)')
|
||||
parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)')
|
||||
|
|
@ -481,7 +608,7 @@ def main():
|
|||
date_str = datetime.date.today().strftime('%Y-%m-%d')
|
||||
|
||||
print(f"{'='*70}")
|
||||
print(f"Planet 8-Band Download - PU Optimized")
|
||||
print(f"Planet Download - Auto-Detecting Collection (PU Optimized)")
|
||||
print(f"{'='*70}")
|
||||
print(f"Project: {args.project}")
|
||||
print(f"Date: {date_str}")
|
||||
|
|
@ -489,7 +616,7 @@ def main():
|
|||
|
||||
# Setup SentinelHub
|
||||
print(f"\nSetting up SentinelHub...")
|
||||
config, byoc, catalog = setup_config()
|
||||
config, catalog = setup_config()
|
||||
print(f"✓ SentinelHub configured")
|
||||
|
||||
# Load geometries
|
||||
|
|
@ -504,15 +631,26 @@ def main():
|
|||
print(f"\n✗ No tiles intersect field geometries. Exiting.")
|
||||
sys.exit(1)
|
||||
|
||||
# Auto-detect collection and get evalscript
|
||||
byoc, collection_info = detect_collection(date_str, bbox_list, catalog, date_range_days=7)
|
||||
|
||||
# Get appropriate evalscript
|
||||
evalscript = EVALSCRIPT_8BAND if collection_info['bands'] == 4 and 'new' not in collection_info.get('note', '') else EVALSCRIPT_8BAND
|
||||
if '4e56d0cb' not in collection_info['collection_id']:
|
||||
evalscript = EVALSCRIPT_4BAND_LEGACY
|
||||
|
||||
print(f"\n Collection: {collection_info['name']}")
|
||||
print(f" Output folder: {collection_info['output_folder']}/")
|
||||
|
||||
# Check date availability
|
||||
print(f"\nChecking data availability...")
|
||||
print(f"\nChecking data availability for {date_str}...")
|
||||
if not check_date_has_data(date_str, bbox_list[0], catalog, byoc):
|
||||
print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.")
|
||||
sys.exit(0)
|
||||
|
||||
# Download tiles
|
||||
print(f"\n{'='*70}")
|
||||
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, args.resolution)
|
||||
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, evalscript, collection_info, args.resolution)
|
||||
|
||||
if downloaded == 0:
|
||||
print(f"\n✗ No tiles downloaded. Exiting.")
|
||||
|
|
@ -522,20 +660,20 @@ def main():
|
|||
if not args.skip_merge:
|
||||
print(f"\n{'='*70}")
|
||||
print(f"Merging tiles...")
|
||||
if merge_tiles(date_str, base_path):
|
||||
if merge_tiles(date_str, base_path, collection_info):
|
||||
print(f"✓ Merge complete")
|
||||
|
||||
# Cleanup intermediate files
|
||||
if args.cleanup:
|
||||
print(f"\nCleaning up intermediate files...")
|
||||
import shutil
|
||||
single_images_dir = base_path / 'single_images_8b' / date_str
|
||||
merged_vrt_dir = base_path / 'merged_virtual_8b'
|
||||
single_images_dir = base_path / collection_info['singles_folder'] / date_str
|
||||
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
|
||||
|
||||
try:
|
||||
if single_images_dir.exists():
|
||||
shutil.rmtree(single_images_dir)
|
||||
print(f" ✓ Deleted {single_images_dir.name}/{date_str}")
|
||||
print(f" ✓ Deleted {collection_info['singles_folder']}/{date_str}")
|
||||
|
||||
# Clean old VRT files
|
||||
for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"):
|
||||
|
|
@ -549,7 +687,7 @@ def main():
|
|||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"✓ Done!")
|
||||
print(f"Output: {base_path / 'merged_tif_8b' / f'{date_str}.tif'}")
|
||||
print(f"Output: {base_path / collection_info['output_folder'] / f'{date_str}.tif'}")
|
||||
print(f"{'='*70}")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -98,9 +98,9 @@ byoc = DataCollection.define_byoc(
|
|||
def setup_paths(project):
|
||||
"""Create and return folder paths."""
|
||||
BASE_PATH = Path('../laravel_app/storage/app') / project
|
||||
BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images_8b')
|
||||
folder_for_merged_tifs = str(BASE_PATH / 'merged_tif_8b')
|
||||
folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual_8b')
|
||||
BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images')
|
||||
folder_for_merged_tifs = str(BASE_PATH / 'merged_tif')
|
||||
folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual')
|
||||
geojson_file = Path(BASE_PATH / 'Data' / 'pivot.geojson')
|
||||
|
||||
# Create folders if missing
|
||||
|
|
|
|||
356
r_app/00_common_utils.R
Normal file
356
r_app/00_common_utils.R
Normal file
|
|
@ -0,0 +1,356 @@
|
|||
# ==============================================================================
|
||||
# # 00_COMMON_UTILS.R
|
||||
# ==============================================================================
|
||||
# # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE
|
||||
# #
|
||||
# # PURPOSE:
|
||||
# # Centralized location for foundational utilities used across multiple scripts.
|
||||
# # These functions have NO project knowledge, NO client-type dependencies,
|
||||
# # NO domain-specific logic.
|
||||
# #
|
||||
# # USAGE:
|
||||
# # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file:
|
||||
# #
|
||||
# # source(here::here("r_app", "parameters_project.R")) # Config first
|
||||
# # source(here::here("r_app", "00_common_utils.R")) # Then common utilities
|
||||
# #
|
||||
# # FUNCTIONS:
|
||||
# # 1. safe_log() — Generic logging with [LEVEL] prefix
|
||||
# # 2. smartcane_debug() — Conditional debug logging
|
||||
# # 3. smartcane_warn() — Convenience wrapper for WARN-level messages
|
||||
# # 4. date_list() — Generate date sequences for processing windows
|
||||
# # 5. load_field_boundaries() — Load field geometries from GeoJSON
|
||||
# # 6. repair_geojson_geometries() — Fix invalid geometries in GeoJSON objects
|
||||
# #
|
||||
# # DATE FUNCTIONS (now in parameters_project.R):
|
||||
# # - get_iso_week() — Extract ISO week number from date
|
||||
# # - get_iso_year() — Extract ISO year from date
|
||||
# # - get_iso_week_year() — Extract both ISO week and year as list
|
||||
# # - format_week_label() — Format date as week/year label
|
||||
# # - load_harvesting_data() — Load harvest schedule from Excel
|
||||
# #
|
||||
# ==============================================================================
|
||||
|
||||
# # Source parameters first to get shared date utility functions
|
||||
# source("parameters_project.R")
|
||||
|
||||
# #' Safe Logging Function
|
||||
# #'
|
||||
# #' Generic logging with [LEVEL] prefix. Works standalone without any framework.
|
||||
# #' Consistent with SmartCane logging standard.
|
||||
# #'
|
||||
# #' @param message The message to log
|
||||
# #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
|
||||
# #' @return NULL (invisible, used for side effects)
|
||||
# #'
|
||||
# #' @examples
|
||||
# #' safe_log("Processing started", "INFO")
|
||||
# #' safe_log("Check input file", "WARNING")
|
||||
# #' safe_log("Failed to load data", "ERROR")
|
||||
# #'
|
||||
# safe_log <- function(message, level = "INFO") {
|
||||
# prefix <- sprintf("[%s]", level)
|
||||
# cat(sprintf("%s %s\n", prefix, message))
|
||||
# }
|
||||
|
||||
# #' SmartCane Debug Logging (Conditional)
|
||||
# #'
|
||||
# #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
|
||||
# #' Useful for development/troubleshooting without cluttering normal output.
|
||||
# #'
|
||||
# #' @param message The message to log
|
||||
# #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
|
||||
# #' @return NULL (invisible, used for side effects)
|
||||
# #'
|
||||
# #' @examples
|
||||
# #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
|
||||
# #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
|
||||
# #'
|
||||
# smartcane_debug <- function(message, verbose = FALSE) {
|
||||
# if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
|
||||
# return(invisible(NULL))
|
||||
# }
|
||||
# safe_log(message, level = "DEBUG")
|
||||
# }
|
||||
|
||||
# #' SmartCane Warning Logging
|
||||
# #'
|
||||
# #' Logs WARN-level messages. Convenience wrapper around safe_log().
|
||||
# #'
|
||||
# #' @param message The message to log
|
||||
# #' @return NULL (invisible, used for side effects)
|
||||
# #'
|
||||
# #' @examples
|
||||
# #' smartcane_warn("Check data format before proceeding")
|
||||
# #'
|
||||
# smartcane_warn <- function(message) {
|
||||
# safe_log(message, level = "WARN")
|
||||
# }
|
||||
|
||||
# #' Load Field Boundaries from GeoJSON
|
||||
# #'
|
||||
# #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
|
||||
# #' Handles CRS validation and column standardization.
|
||||
# #'
|
||||
# #' @param data_dir Directory containing GeoJSON file
|
||||
# #' @return List with elements:
|
||||
# #' - field_boundaries_sf: sf (Simple Features) object
|
||||
# #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
|
||||
# #'
|
||||
# #' @details
|
||||
# #' Automatically selects pivot_2.geojson for ESA project during CI extraction,
|
||||
# #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
|
||||
# #'
|
||||
# #' @examples
|
||||
# #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
|
||||
# #' head(boundaries$field_boundaries_sf)
|
||||
# #'
|
||||
# load_field_boundaries <- function(data_dir) {
|
||||
# # Choose field boundaries file based on project and script type
|
||||
# # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
|
||||
# # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
|
||||
# use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
|
||||
# exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
|
||||
|
||||
# if (use_pivot_2) {
|
||||
# field_boundaries_path <- file.path(data_dir, "pivot_2.geojson")
|
||||
# } else {
|
||||
# field_boundaries_path <- file.path(data_dir, "pivot.geojson")
|
||||
# }
|
||||
|
||||
# if (!file.exists(field_boundaries_path)) {
|
||||
# stop(paste("Field boundaries file not found at path:", field_boundaries_path))
|
||||
# }
|
||||
|
||||
# tryCatch({
|
||||
# # Read GeoJSON with explicit CRS handling
|
||||
# field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
|
||||
|
||||
# # Remove OBJECTID column immediately if it exists
|
||||
# if ("OBJECTID" %in% names(field_boundaries_sf)) {
|
||||
# field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
|
||||
# }
|
||||
|
||||
# # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
|
||||
# # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
|
||||
# # to prevent S2 geometry validation errors during downstream processing
|
||||
# field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
|
||||
|
||||
# # Validate and fix CRS if needed
|
||||
# tryCatch({
|
||||
# # Simply assign WGS84 if not already set (safe approach)
|
||||
# if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
|
||||
# st_crs(field_boundaries_sf) <- 4326
|
||||
# warning("CRS was missing, assigned WGS84 (EPSG:4326)")
|
||||
# }
|
||||
# }, error = function(e) {
|
||||
# tryCatch({
|
||||
# st_crs(field_boundaries_sf) <<- 4326
|
||||
# }, error = function(e2) {
|
||||
# warning(paste("Could not set CRS:", e2$message))
|
||||
# })
|
||||
# })
|
||||
|
||||
# # Handle column names - accommodate optional sub_area column
|
||||
# if ("sub_area" %in% names(field_boundaries_sf)) {
|
||||
# field_boundaries_sf <- field_boundaries_sf %>%
|
||||
# dplyr::select(field, sub_field, sub_area) %>%
|
||||
# sf::st_set_geometry("geometry")
|
||||
# } else {
|
||||
# field_boundaries_sf <- field_boundaries_sf %>%
|
||||
# dplyr::select(field, sub_field) %>%
|
||||
# sf::st_set_geometry("geometry")
|
||||
# }
|
||||
|
||||
# # Convert to terra vector if possible, otherwise use sf
|
||||
# field_boundaries <- tryCatch({
|
||||
# field_boundaries_terra <- terra::vect(field_boundaries_sf)
|
||||
# crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
|
||||
# crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
|
||||
|
||||
# if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
|
||||
# terra::crs(field_boundaries_terra) <- "EPSG:4326"
|
||||
# warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
|
||||
# }
|
||||
# field_boundaries_terra
|
||||
|
||||
# }, error = function(e) {
|
||||
# warning(paste("Terra conversion failed, using sf object instead:", e$message))
|
||||
# field_boundaries_sf
|
||||
# })
|
||||
|
||||
# return(list(
|
||||
# field_boundaries_sf = field_boundaries_sf,
|
||||
# field_boundaries = field_boundaries
|
||||
# ))
|
||||
# }, error = function(e) {
|
||||
# cat("[DEBUG] Error in load_field_boundaries:\n")
|
||||
# cat(" Message:", e$message, "\n")
|
||||
# cat(" Call:", deparse(e$call), "\n")
|
||||
# stop(paste("Error loading field boundaries:", e$message))
|
||||
# })
|
||||
# }
|
||||
|
||||
|
||||
|
||||
# #' Generate a Sequence of Dates for Processing
|
||||
# #'
|
||||
# #' Creates a date range from start_date to end_date and extracts week/year info.
|
||||
# #' Used by Scripts 20, 30, 40 to determine data processing windows.
|
||||
# #'
|
||||
# #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
|
||||
# #' @param offset Number of days to look back from end_date (e.g., 7 for one week)
|
||||
# #' @return A list containing:
|
||||
# #' - week: ISO week number of start_date
|
||||
# #' - year: ISO year of start_date
|
||||
# #' - days_filter: Vector of dates in "YYYY-MM-DD" format
|
||||
# #' - start_date: Start date as Date object
|
||||
# #' - end_date: End date as Date object
|
||||
# #'
|
||||
# #' @details
|
||||
# #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
|
||||
# #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
|
||||
# #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
|
||||
# #'
|
||||
# #' @examples
|
||||
# #' dates <- date_list(as.Date("2025-01-15"), offset = 7)
|
||||
# #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
|
||||
# #'
|
||||
# #' dates <- date_list("2025-12-31", offset = 14)
|
||||
# #' # Handles string input and returns 14 days of data
|
||||
# #'
|
||||
# date_list <- function(end_date, offset) {
|
||||
# # Input validation
|
||||
# if (!lubridate::is.Date(end_date)) {
|
||||
# end_date <- as.Date(end_date)
|
||||
# if (is.na(end_date)) {
|
||||
# stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
||||
# }
|
||||
# }
|
||||
|
||||
# offset <- as.numeric(offset)
|
||||
# if (is.na(offset) || offset < 1) {
|
||||
# stop("Invalid offset provided. Expected a positive number.")
|
||||
# }
|
||||
|
||||
# # Calculate date range
|
||||
# offset <- offset - 1 # Adjust offset to include end_date
|
||||
# start_date <- end_date - lubridate::days(offset)
|
||||
|
||||
# # Extract ISO week and year information (from END date for reporting period)
|
||||
# week <- lubridate::isoweek(end_date)
|
||||
# year <- lubridate::isoyear(end_date)
|
||||
|
||||
# # Generate sequence of dates
|
||||
# days_filter <- seq(from = start_date, to = end_date, by = "day")
|
||||
# days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
||||
|
||||
# # Log the date range
|
||||
# safe_log(paste("Date range generated from", start_date, "to", end_date))
|
||||
|
||||
# return(list(
|
||||
# "week" = week,
|
||||
# "year" = year,
|
||||
# "days_filter" = days_filter,
|
||||
# "start_date" = start_date,
|
||||
# "end_date" = end_date
|
||||
# ))
|
||||
# }
|
||||
|
||||
# ==============================================================================
|
||||
# #' Repair Invalid GeoJSON Geometries
|
||||
# #'
|
||||
# #' Fixes common geometry issues in GeoJSON/sf objects:
|
||||
# #' - Degenerate vertices (duplicate points)
|
||||
# #' - Self-intersecting polygons
|
||||
# #' - Invalid ring orientation
|
||||
# #' - Empty or NULL geometries
|
||||
# #'
|
||||
# #' Uses sf::st_make_valid() with buffer trick as fallback.
|
||||
# #'
|
||||
# #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
|
||||
# #' @return sf object with repaired geometries
|
||||
# #'
|
||||
# #' @details
|
||||
# #' **Why this matters:**
|
||||
# #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
|
||||
# #' rings from manual editing or GIS data sources. These cause errors when using
|
||||
# #' S2 geometry (strict validation) during cropping operations.
|
||||
# #'
|
||||
# #' **Repair strategy (priority order):**
|
||||
# #' 1. Try st_make_valid() - GEOS-based repair (most reliable)
|
||||
# #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
|
||||
# #' 3. Last resort: Silently keep original if repair fails
|
||||
# #'
|
||||
# #' @examples
|
||||
# #' \dontrun{
|
||||
# #' fields <- st_read("pivot.geojson")
|
||||
# #' fields_fixed <- repair_geojson_geometries(fields)
|
||||
# #' cat(paste("Fixed geometries: before=",
|
||||
# #' nrow(fields[!st_is_valid(fields), ]),
|
||||
# #' ", after=",
|
||||
# #' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
|
||||
# #' }
|
||||
# #'
|
||||
# repair_geojson_geometries <- function(sf_object) {
|
||||
# if (!inherits(sf_object, "sf")) {
|
||||
# stop("Input must be an sf (Simple Features) object")
|
||||
# }
|
||||
|
||||
# # Count invalid geometries BEFORE repair
|
||||
# invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
|
||||
|
||||
# if (invalid_before == 0) {
|
||||
# safe_log("All geometries already valid - no repair needed", "INFO")
|
||||
# return(sf_object)
|
||||
# }
|
||||
|
||||
# safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
|
||||
|
||||
# # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
|
||||
# # This handles degenerate vertices, self-intersections, invalid rings while preserving all features
|
||||
# repaired <- tryCatch({
|
||||
# # st_make_valid() on entire sf object preserves all features and attributes
|
||||
# repaired_geom <- sf::st_make_valid(sf_object)
|
||||
|
||||
# # Verify we still have the same number of rows
|
||||
# if (nrow(repaired_geom) != nrow(sf_object)) {
|
||||
# warning("st_make_valid() changed number of features - attempting row-wise repair")
|
||||
|
||||
# # Fallback: Repair row-by-row to maintain original structure
|
||||
# repaired_geom <- sf_object
|
||||
# for (i in seq_len(nrow(sf_object))) {
|
||||
# tryCatch({
|
||||
# if (!sf::st_is_valid(sf_object[i, ])) {
|
||||
# repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
|
||||
# }
|
||||
# }, error = function(e) {
|
||||
# safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
|
||||
# })
|
||||
# }
|
||||
# }
|
||||
|
||||
# safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
|
||||
# repaired_geom
|
||||
# }, error = function(e) {
|
||||
# safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
|
||||
# NULL
|
||||
# })
|
||||
|
||||
# # If repair failed, keep original
|
||||
# if (is.null(repaired)) {
|
||||
# safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
|
||||
# "WARNING")
|
||||
# return(sf_object)
|
||||
# }
|
||||
|
||||
# # Count invalid geometries AFTER repair
|
||||
# invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
|
||||
# safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
|
||||
|
||||
# return(repaired)
|
||||
# }
|
||||
|
||||
# ==============================================================================
|
||||
# # END 00_COMMON_UTILS.R
|
||||
# ==============================================================================
|
||||
|
|
@ -1,499 +0,0 @@
|
|||
#' Combined: Create master grid and split TIFFs into tiles
|
||||
#' ====================================================================
|
||||
#'
|
||||
#' Purpose:
|
||||
#' 1. Check all daily TIFFs for matching extents
|
||||
#' 2. Create master 5×5 grid covering all TIFFs
|
||||
#' 3. Split each daily TIFF into 25 tiles using the master grid
|
||||
#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
|
||||
#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18
|
||||
|
||||
|
||||
library(terra)
|
||||
library(sf)
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION & COMMAND-LINE ARGUMENTS
|
||||
# ============================================================================
|
||||
|
||||
# Parse command-line arguments for date filtering
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17
|
||||
start_date <- NULL
|
||||
end_date <- NULL
|
||||
|
||||
if (length(args) >= 1) {
|
||||
start_date <- as.Date(args[1])
|
||||
cat("Filtering: start date =", as.character(start_date), "\n")
|
||||
}
|
||||
|
||||
if (length(args) >= 2) {
|
||||
end_date <- as.Date(args[2])
|
||||
cat("Filtering: end date =", as.character(end_date), "\n")
|
||||
}
|
||||
|
||||
PROJECT <- "angata"
|
||||
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
|
||||
|
||||
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
|
||||
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
|
||||
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
|
||||
GRID_NROWS <- 5
|
||||
GRID_NCOLS <- 5
|
||||
|
||||
# Construct grid-specific subfolder path
|
||||
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
|
||||
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
|
||||
|
||||
# Load field boundaries for overlap checking
|
||||
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
|
||||
|
||||
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
|
||||
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
||||
|
||||
# ============================================================================
|
||||
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
|
||||
# ============================================================================
|
||||
|
||||
cat("\n[PART 1] Creating Master Grid\n")
|
||||
|
||||
# Load field boundaries for overlap checking
|
||||
cat("\n[1] Checking for existing master grid...\n")
|
||||
|
||||
# Check if master grid already exists
|
||||
MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
||||
|
||||
if (file.exists(MASTER_GRID_PATH)) {
|
||||
cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "")
|
||||
master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE)
|
||||
field_boundaries_sf <- NULL # No need to load pivot.geojson
|
||||
field_boundaries_vect <- NULL
|
||||
|
||||
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
|
||||
|
||||
} else {
|
||||
# No existing grid - need to create one from pivot.geojson
|
||||
cat(" No existing grid found. Creating new one from pivot.geojson...\n")
|
||||
|
||||
if (!file.exists(GEOJSON_PATH)) {
|
||||
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
|
||||
"Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ",
|
||||
"from the same directory as a previous successful run (grid already exists).")
|
||||
}
|
||||
|
||||
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
|
||||
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
|
||||
|
||||
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "")
|
||||
}
|
||||
|
||||
# Try to find a name column (only if field_boundaries_sf exists)
|
||||
if (!is.null(field_boundaries_sf)) {
|
||||
field_names <- NA
|
||||
if ("name" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$name
|
||||
} else if ("field" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$field
|
||||
} else if ("field_name" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$field_name
|
||||
} else {
|
||||
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
|
||||
}
|
||||
|
||||
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
|
||||
}
|
||||
|
||||
# Helper function: Check if a tile overlaps with any field (simple bbox overlap)
|
||||
tile_overlaps_fields <- function(tile_extent, field_geoms) {
|
||||
tryCatch({
|
||||
# Simple bounding box overlap test - no complex geometry operations
|
||||
# Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR
|
||||
# box1.ymax < box2.ymin OR box1.ymin > box2.ymax)
|
||||
|
||||
# For each field geometry, check if it overlaps with tile bbox
|
||||
for (i in seq_len(length(field_geoms))) {
|
||||
# Skip empty geometries
|
||||
if (st_is_empty(field_geoms[i])) {
|
||||
next
|
||||
}
|
||||
|
||||
# Get field bbox
|
||||
field_bbox <- st_bbox(field_geoms[i])
|
||||
|
||||
# Check bbox overlap (simple coordinate comparison)
|
||||
x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax)
|
||||
y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax)
|
||||
|
||||
if (x_overlap && y_overlap) {
|
||||
return(TRUE) # Found overlap!
|
||||
}
|
||||
}
|
||||
|
||||
return(FALSE) # No overlap found
|
||||
|
||||
}, error = function(e) {
|
||||
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
|
||||
return(TRUE) # Default to including tile if there's an error
|
||||
})
|
||||
}
|
||||
|
||||
cat("\n[2] Checking TIFF extents...\n")
|
||||
|
||||
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
|
||||
tiff_files <- sort(tiff_files)
|
||||
|
||||
# Filter by date range if specified
|
||||
if (!is.null(start_date) || !is.null(end_date)) {
|
||||
cat("\nApplying date filter...\n")
|
||||
|
||||
file_dates <- as.Date(sub("\\.tif$", "", tiff_files))
|
||||
|
||||
if (!is.null(start_date) && !is.null(end_date)) {
|
||||
keep_idx <- file_dates >= start_date & file_dates <= end_date
|
||||
cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "")
|
||||
} else if (!is.null(start_date)) {
|
||||
keep_idx <- file_dates >= start_date
|
||||
cat(" From: ", as.character(start_date), "\n", sep = "")
|
||||
} else {
|
||||
keep_idx <- file_dates <= end_date
|
||||
cat(" Until: ", as.character(end_date), "\n", sep = "")
|
||||
}
|
||||
|
||||
tiff_files <- tiff_files[keep_idx]
|
||||
cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "")
|
||||
}
|
||||
|
||||
if (length(tiff_files) == 0) {
|
||||
stop("No TIFF files found in ", TIFF_FOLDER)
|
||||
}
|
||||
|
||||
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
|
||||
cat(" Checking extents... (this may take a while)\n")
|
||||
|
||||
# Load all extents - ONE TIME, upfront
|
||||
extents <- list()
|
||||
for (i in seq_along(tiff_files)) {
|
||||
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
|
||||
raster <- terra::rast(tiff_path)
|
||||
ext <- terra::ext(raster)
|
||||
extents[[i]] <- ext
|
||||
|
||||
# Progress indicator every 50 files
|
||||
if (i %% 50 == 0) {
|
||||
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
|
||||
}
|
||||
}
|
||||
|
||||
cat(" ✓ All extents loaded\n")
|
||||
|
||||
# Check if all extents match
|
||||
cat("\n[3] Comparing extents...\n")
|
||||
|
||||
tolerance <- 1e-8
|
||||
all_match <- TRUE
|
||||
first_ext <- extents[[1]]
|
||||
|
||||
for (i in 2:length(extents)) {
|
||||
curr_ext <- extents[[i]]
|
||||
match <- (
|
||||
abs(curr_ext$xmin - first_ext$xmin) < tolerance &&
|
||||
abs(curr_ext$xmax - first_ext$xmax) < tolerance &&
|
||||
abs(curr_ext$ymin - first_ext$ymin) < tolerance &&
|
||||
abs(curr_ext$ymax - first_ext$ymax) < tolerance
|
||||
)
|
||||
if (!match) {
|
||||
all_match <- FALSE
|
||||
cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "")
|
||||
cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ",
|
||||
"Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "")
|
||||
cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ",
|
||||
"Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "")
|
||||
}
|
||||
}
|
||||
|
||||
if (all_match) {
|
||||
cat(" ✓ All TIFF extents MATCH perfectly!\n")
|
||||
} else {
|
||||
cat(" ⚠️ Extents differ - creating master extent covering all\n")
|
||||
}
|
||||
|
||||
# Create master extent
|
||||
cat("\n[4] Creating master extent...\n")
|
||||
|
||||
master_xmin <- min(sapply(extents, function(e) e$xmin))
|
||||
master_xmax <- max(sapply(extents, function(e) e$xmax))
|
||||
master_ymin <- min(sapply(extents, function(e) e$ymin))
|
||||
master_ymax <- max(sapply(extents, function(e) e$ymax))
|
||||
|
||||
x_range_m <- (master_xmax - master_xmin) * 111320
|
||||
y_range_m <- (master_ymax - master_ymin) * 111320
|
||||
|
||||
cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ",
|
||||
"Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "")
|
||||
cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "")
|
||||
|
||||
# Auto-determine grid size based on ROI dimensions
|
||||
if (x_range_m < 10000 && y_range_m < 10000) {
|
||||
cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n")
|
||||
GRID_NROWS <- 1
|
||||
GRID_NCOLS <- 1
|
||||
} else {
|
||||
cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n")
|
||||
GRID_NROWS <- 5
|
||||
GRID_NCOLS <- 5
|
||||
}
|
||||
|
||||
N_TILES <- GRID_NROWS * GRID_NCOLS
|
||||
|
||||
# Check if master grid already exists
|
||||
cat("\n[5] Checking if master grid exists...\n")
|
||||
|
||||
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
||||
|
||||
if (file.exists(master_grid_file)) {
|
||||
cat(" ✓ Master grid exists! Loading existing grid...\n")
|
||||
master_grid_sf <- st_read(master_grid_file, quiet = TRUE)
|
||||
master_grid_vect <- terra::vect(master_grid_file)
|
||||
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
|
||||
} else {
|
||||
cat(" Grid does not exist. Creating new master grid...\n")
|
||||
|
||||
# Create 5×5 grid
|
||||
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
|
||||
|
||||
master_bbox <- st_bbox(c(
|
||||
xmin = master_xmin,
|
||||
xmax = master_xmax,
|
||||
ymin = master_ymin,
|
||||
ymax = master_ymax
|
||||
), crs = 4326)
|
||||
|
||||
bbox_sf <- st_as_sfc(master_bbox)
|
||||
|
||||
master_grid <- st_make_grid(
|
||||
bbox_sf,
|
||||
n = c(GRID_NCOLS, GRID_NROWS),
|
||||
what = "polygons"
|
||||
)
|
||||
|
||||
master_grid_sf <- st_sf(
|
||||
tile_id = sprintf("%02d", 1:length(master_grid)),
|
||||
geometry = master_grid
|
||||
)
|
||||
|
||||
cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "")
|
||||
|
||||
# Convert to SpatVector for use in makeTiles
|
||||
master_grid_vect <- terra::vect(master_grid_sf)
|
||||
|
||||
# Save master grid
|
||||
if (!dir.exists(OUTPUT_FOLDER)) {
|
||||
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
|
||||
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
|
||||
# ============================================================================
|
||||
|
||||
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
|
||||
|
||||
# If grid was loaded from file, it's already filtered. Skip filtering.
|
||||
if (!file.exists(MASTER_GRID_PATH)) {
|
||||
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
|
||||
|
||||
# Check which tiles overlap with any field
|
||||
overlapping_tile_indices <- c()
|
||||
for (tile_idx in 1:nrow(master_grid_sf)) {
|
||||
tile_geom <- master_grid_sf[tile_idx, ]
|
||||
|
||||
# Check overlap with any field
|
||||
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
|
||||
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
|
||||
}
|
||||
}
|
||||
|
||||
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
|
||||
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
|
||||
|
||||
# Create filtered grid with only overlapping tiles
|
||||
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
|
||||
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
|
||||
} else {
|
||||
cat("\n[7] Using pre-filtered grid (already loaded from file)...\n")
|
||||
# Grid was already loaded - it's already filtered
|
||||
filtered_grid_sf <- master_grid_sf
|
||||
}
|
||||
|
||||
# Convert to SpatVector for makeTiles
|
||||
filtered_grid_vect <- terra::vect(filtered_grid_sf)
|
||||
|
||||
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
|
||||
|
||||
# ============================================================================
|
||||
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
|
||||
# ============================================================================
|
||||
|
||||
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
|
||||
cat("\n[8] Processing each date independently...\n")
|
||||
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
|
||||
|
||||
total_tiles_created <- 0
|
||||
dates_skipped <- 0
|
||||
dates_processed <- 0
|
||||
|
||||
for (file_idx in seq_along(tiff_files)) {
|
||||
tiff_file <- tiff_files[file_idx]
|
||||
date_str <- gsub("\\.tif$", "", tiff_file)
|
||||
|
||||
# Create date-specific output folder
|
||||
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
|
||||
|
||||
# CHECK: Skip if date already processed (RESUME-SAFE)
|
||||
if (dir.exists(date_output_folder)) {
|
||||
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
|
||||
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
|
||||
|
||||
if (length(existing_tiles) > 0) {
|
||||
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
|
||||
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
|
||||
dates_skipped <- dates_skipped + 1
|
||||
next # Skip this date
|
||||
}
|
||||
}
|
||||
|
||||
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
|
||||
dates_processed <- dates_processed + 1
|
||||
|
||||
# Load TIFF for this date only
|
||||
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
|
||||
raster <- terra::rast(tiff_path)
|
||||
|
||||
dims <- dim(raster)
|
||||
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
|
||||
|
||||
# Create date-specific output folder
|
||||
if (!dir.exists(date_output_folder)) {
|
||||
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
|
||||
cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "")
|
||||
|
||||
# Use makeTiles with FILTERED grid (only overlapping tiles)
|
||||
tiles_list <- terra::makeTiles(
|
||||
x = raster,
|
||||
y = filtered_grid_vect,
|
||||
filename = file.path(date_output_folder, "tile.tif"),
|
||||
overwrite = TRUE
|
||||
)
|
||||
|
||||
# Rename tiles to [DATE]_[TILE_ID].tif
|
||||
for (tile_idx in seq_along(tiles_list)) {
|
||||
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
|
||||
tile_id <- filtered_grid_sf$tile_id[tile_idx]
|
||||
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
|
||||
|
||||
if (file.exists(source_file)) {
|
||||
file.rename(source_file, final_file)
|
||||
}
|
||||
}
|
||||
|
||||
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
|
||||
total_tiles_created <- total_tiles_created + length(tiles_list)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# VERIFICATION
|
||||
# ============================================================================
|
||||
|
||||
cat("\n[9] Verifying output...\n")
|
||||
|
||||
# Count tiles per date folder
|
||||
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
|
||||
date_folders <- sort(date_folders[date_folders != "."])
|
||||
|
||||
total_tile_files <- 0
|
||||
for (date_folder in date_folders) {
|
||||
tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder),
|
||||
pattern = "\\.tif$")
|
||||
tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)]
|
||||
total_tile_files <- total_tile_files + length(tiles_in_folder)
|
||||
cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "")
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# SUMMARY
|
||||
# ============================================================================
|
||||
|
||||
cat("\n\n========== SUMMARY ==========\n")
|
||||
|
||||
cat("\nGrid Configuration:\n")
|
||||
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
|
||||
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
||||
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||||
|
||||
cat("\nField Filtering:\n")
|
||||
cat(" - Field boundaries loaded from pivot.geojson\n")
|
||||
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
|
||||
cat(" - Significant storage savings for sparse fields!\n")
|
||||
|
||||
cat("\nProcessing Summary:\n")
|
||||
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
|
||||
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
|
||||
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
|
||||
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
|
||||
if (dates_processed > 0) {
|
||||
avg_tiles_per_date <- total_tiles_created / dates_processed
|
||||
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
|
||||
}
|
||||
|
||||
cat("\nDirectory Structure:\n")
|
||||
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
|
||||
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
|
||||
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||||
cat(" ├── 2024-01-15/\n")
|
||||
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
|
||||
cat(" │ ├── 2024-01-15_05.tif\n")
|
||||
cat(" │ └── ...\n")
|
||||
cat(" ├── 2024-01-16/\n")
|
||||
cat(" │ └── ...\n")
|
||||
cat(" └── ...\n")
|
||||
|
||||
cat("\n⭐ Key Benefits:\n")
|
||||
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
|
||||
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
|
||||
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
|
||||
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
|
||||
|
||||
# ============================================================================
|
||||
# WRITE TILING CONFIGURATION METADATA
|
||||
# ============================================================================
|
||||
# This metadata file is read by parameters_project.R to determine mosaic mode
|
||||
# It allows script 40 to know what script 10 decided without re-computing
|
||||
|
||||
cat("\n[10] Writing tiling configuration metadata...\n")
|
||||
|
||||
config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json")
|
||||
config_json <- paste0(
|
||||
'{\n',
|
||||
' "project": "', PROJECT, '",\n',
|
||||
' "has_tiles": ', tolower(N_TILES > 1), ',\n',
|
||||
' "grid_size": "', GRID_SIZE_LABEL, '",\n',
|
||||
' "grid_rows": ', GRID_NROWS, ',\n',
|
||||
' "grid_cols": ', GRID_NCOLS, ',\n',
|
||||
' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n',
|
||||
' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n',
|
||||
' "created_date": "', Sys.Date(), '",\n',
|
||||
' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n',
|
||||
'}\n'
|
||||
)
|
||||
|
||||
writeLines(config_json, config_file)
|
||||
cat(" ✓ Metadata saved to: tiling_config.json\n")
|
||||
cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "")
|
||||
cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "")
|
||||
|
||||
cat("\n✓ Script complete!\n")
|
||||
165
r_app/10_create_per_field_tiffs.R
Normal file
165
r_app/10_create_per_field_tiffs.R
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
# ============================================================================
|
||||
# SCRIPT 10: Create Per-Field TIFFs (Data Organization & Splitting)
|
||||
# ============================================================================
|
||||
# PURPOSE:
|
||||
# Split full-farm satellite TIFFs into per-field file structure. Supports
|
||||
# two phases: legacy data migration and ongoing new downloads. Transforms
|
||||
# single large-file architecture into per-field directory structure for
|
||||
# clean aggregation in downstream scripts (Script 20, 40, 80/90).
|
||||
#
|
||||
# INPUT DATA:
|
||||
# - Source: laravel_app/storage/app/{project}/merged_tif/ or merged_final_tif/
|
||||
# - Format: GeoTIFF (4-band RGB+NIR or 5-band with CI)
|
||||
# - Naming: {YYYY-MM-DD}.tif (full farm mosaic)
|
||||
#
|
||||
# OUTPUT DATA:
|
||||
# - Destination: laravel_app/storage/app/{project}/field_tiles/
|
||||
# - Format: GeoTIFF (4-band RGB+NIR, same as input)
|
||||
# - Structure: field_tiles/{FIELD}/{YYYY-MM-DD}.tif
|
||||
# - Naming: Per-field GeoTIFFs organized by field and date
|
||||
#
|
||||
# USAGE:
|
||||
# Rscript 10_create_per_field_tiffs.R [project] [end_date] [offset]
|
||||
#
|
||||
# Example (Windows PowerShell):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
|
||||
#
|
||||
# PARAMETERS:
|
||||
# - project: Project name (character) - angata, chemba, xinavane, esa, simba (default: angata)
|
||||
# - end_date: End date for processing (YYYY-MM-DD format, default: today)
|
||||
# - offset: Days to look back (numeric, default: 7)
|
||||
#
|
||||
# CLIENT TYPES:
|
||||
# - cane_supply (ANGATA): Yes - primary data organization script
|
||||
# - agronomic_support (AURA): Yes - supports field-level analysis
|
||||
#
|
||||
# DEPENDENCIES:
|
||||
# - Packages: terra, sf, tidyverse
|
||||
# - Utils files: parameters_project.R, 00_common_utils.R, 10_create_per_field_tiffs_utils.R
|
||||
# - External data: Field boundaries (pivot.geojson)
|
||||
# - Data directories: merged_tif/, field_tiles/ (created if missing)
|
||||
#
|
||||
# NOTES:
|
||||
# - Supports two-phase migration: legacy (merged_final_tif) and ongoing (merged_tif)
|
||||
# - Automatically detects and handles field boundaries from pivot.geojson
|
||||
# - Geometry validation and repair applied via st_make_valid()
|
||||
# - Critical for downstream Scripts 20, 40, and KPI calculations
|
||||
# - Creates per-field structure that enables efficient per-field processing
|
||||
#
|
||||
# RELATED ISSUES:
|
||||
# SC-111: Script 10 refactoring and geometry repair
|
||||
# SC-112: Utilities restructuring (uses 00_common_utils.R)
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
# Spatial data handling
|
||||
suppressPackageStartupMessages({
|
||||
|
||||
library(terra) # For raster operations (reading/writing GeoTIFFs, cropping to field boundaries)
|
||||
library(sf) # For spatial operations (reading field boundaries GeoJSON, masking)
|
||||
library(here) # For relative path resolution
|
||||
})
|
||||
# ==============================================================================
|
||||
# MAIN PROCESSING FUNCTION
|
||||
# ==============================================================================
|
||||
|
||||
main <- function() {
|
||||
# STEP 1: Set working directory to project root (smartcane/)
|
||||
# This ensures all relative paths resolve correctly
|
||||
if (basename(getwd()) == "r_app") {
|
||||
setwd("..")
|
||||
}
|
||||
|
||||
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
# Parse arguments: [project] [end_date] [offset]
|
||||
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||
end_date_arg <- if (length(args) >= 2 && args[2] != "") as.Date(args[2], format = "%Y-%m-%d") else Sys.Date()
|
||||
offset_arg <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
|
||||
|
||||
# Make variables available to sourced files (they execute in global scope)
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
assign("end_date", end_date_arg, envir = .GlobalEnv)
|
||||
assign("offset", offset_arg, envir = .GlobalEnv)
|
||||
|
||||
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
|
||||
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
|
||||
tryCatch({
|
||||
source("r_app/parameters_project.R")
|
||||
}, error = function(e) {
|
||||
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
|
||||
stop(e)
|
||||
})
|
||||
|
||||
# Load Script 10-specific utilities
|
||||
tryCatch({
|
||||
source("r_app/10_create_per_field_tiffs_utils.R")
|
||||
}, error = function(e) {
|
||||
cat(sprintf("Error loading 10_create_per_field_tiffs_utils.R: %s\n", e$message))
|
||||
stop(e)
|
||||
})
|
||||
|
||||
# STEP 4: Set default date parameters (can be overridden by pipeline runner via assign())
|
||||
# These control which dates Script 10 processes from merged_tif/
|
||||
# Window: end_date - offset days to end_date
|
||||
# Always coerce to correct types to avoid issues with lingering/inherited values
|
||||
if (!exists("end_date") || !inherits(end_date, "Date")) {
|
||||
end_date <- Sys.Date()
|
||||
safe_log(paste("Using default end_date:", end_date), "INFO")
|
||||
}
|
||||
if (!exists("offset") || !is.numeric(offset)) {
|
||||
offset <- 7
|
||||
safe_log(paste("Using default offset:", offset, "days"), "INFO")
|
||||
}
|
||||
|
||||
# Ensure offset is numeric (in case it came in as a character string from environment)
|
||||
if (is.character(offset)) {
|
||||
offset <- as.numeric(offset)
|
||||
}
|
||||
|
||||
# Calculate date window for processing
|
||||
start_date <- end_date - offset
|
||||
date_window <- seq(start_date, end_date, by = "day")
|
||||
date_window_str <- format(date_window, "%Y-%m-%d")
|
||||
safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO")
|
||||
|
||||
# STEP 5: Load centralized path structure (creates all directories automatically)
|
||||
paths <- setup_project_directories(project_dir)
|
||||
|
||||
safe_log(paste("Project:", project_dir))
|
||||
safe_log(paste("Base path:", paths$laravel_storage_dir))
|
||||
safe_log(paste("Data dir:", paths$data_dir))
|
||||
|
||||
# Load field boundaries using data_dir (not field_boundaries_path)
|
||||
# load_field_boundaries() expects a directory and builds the file path internally
|
||||
fields_data <- load_field_boundaries(paths$data_dir)
|
||||
fields <- fields_data$field_boundaries_sf
|
||||
|
||||
# Define input and output directories (from centralized paths)
|
||||
merged_tif_dir <- paths$merged_tif_folder
|
||||
field_tiles_dir <- paths$field_tiles_dir
|
||||
field_tiles_ci_dir <- paths$field_tiles_ci_dir
|
||||
|
||||
# PHASE 1: Process new downloads (always runs)
|
||||
# Pass field_tiles_ci_dir so it can skip dates already migrated
|
||||
# Also pass end_date and offset so only dates in window are processed
|
||||
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir,
|
||||
end_date = end_date, offset = offset)
|
||||
|
||||
safe_log("\n========================================", "INFO")
|
||||
safe_log("FINAL SUMMARY", "INFO")
|
||||
safe_log("========================================", "INFO")
|
||||
safe_log(paste("Processing: created =", process_result$total_created,
|
||||
", skipped =", process_result$total_skipped,
|
||||
", errors =", process_result$total_errors), "INFO")
|
||||
safe_log("Script 10 complete", "INFO")
|
||||
safe_log("========================================\n", "INFO")
|
||||
|
||||
quit(status = 0)
|
||||
}
|
||||
|
||||
# Execute main if called from command line
|
||||
if (sys.nframe() == 0) {
|
||||
main()
|
||||
}
|
||||
306
r_app/10_create_per_field_tiffs_utils.R
Normal file
306
r_app/10_create_per_field_tiffs_utils.R
Normal file
|
|
@ -0,0 +1,306 @@
|
|||
# ==============================================================================
|
||||
# 10_CREATE_PER_FIELD_TIFFS_UTILS.R
|
||||
# ==============================================================================
|
||||
# UTILITY FUNCTIONS FOR SCRIPT 10: Per-Field TIFF Creation
|
||||
#
|
||||
# PURPOSE:
|
||||
# Extracted helper functions specific to Script 10 (per-field TIFF splitting).
|
||||
# These functions are domain-specific (not generic) and handle raster cropping
|
||||
# and batch processing of satellite imagery for per-field storage.
|
||||
#
|
||||
# FUNCTIONS:
|
||||
# 1. crop_tiff_to_fields() — Crop a single TIFF to field boundaries
|
||||
# 2. process_new_merged_tif() — Batch process all new merged TIFFs into per-field structure
|
||||
#
|
||||
# DEPENDENCIES:
|
||||
# - terra (raster operations)
|
||||
# - sf (geometry operations)
|
||||
# - 00_common_utils.R (logging via safe_log())
|
||||
#
|
||||
# USAGE:
|
||||
# source(here::here("r_app", "10_create_per_field_tiffs_utils.R"))
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
library(terra)
|
||||
library(sf)
|
||||
|
||||
# ==============================================================================
|
||||
#' Crop Single TIFF to Field Boundaries
|
||||
#'
|
||||
#' Loads a single satellite TIFF, finds overlapping field boundaries, and writes
|
||||
#' cropped per-field versions to output directory. Handles CRS reprojection and
|
||||
#' idempotent file creation (skips existing files).
|
||||
#'
|
||||
#' @param tif_path Character. Absolute path to source TIFF file (4-band or 5-band).
|
||||
#' @param tif_date Character. Date string (YYYY-MM-DD) for naming output files.
|
||||
#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column.
|
||||
#' @param output_base_dir Character. Base directory where per-field TIFFs are stored
|
||||
#' (e.g., "field_tiles" or "field_tiles_CI").
|
||||
#'
|
||||
#' @return List with elements:
|
||||
#' - created: Integer. Number of field TIFFs created
|
||||
#' - skipped: Integer. Number of files that already existed
|
||||
#' - errors: Integer. Number of fields that failed to crop
|
||||
#'
|
||||
#' @details
|
||||
#' **Output Structure:**
|
||||
#' Creates files at: {output_base_dir}/{field_name}/{tif_date}.tif
|
||||
#'
|
||||
#' **CRS Handling:**
|
||||
#' Automatically reprojects field boundaries to match raster CRS before cropping.
|
||||
#'
|
||||
#' **Idempotency:**
|
||||
#' If output file already exists, skips writing (counted as "skipped").
|
||||
#' Safe to re-run without duplicate file creation.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' fields <- load_field_boundaries("path/to/pivot.geojson")
|
||||
#' result <- crop_tiff_to_fields(
|
||||
#' tif_path = "merged_tif/2024-01-15.tif",
|
||||
#' tif_date = "2024-01-15",
|
||||
#' fields = fields,
|
||||
#' output_base_dir = "field_tiles"
|
||||
#' )
|
||||
#' cat(sprintf("Created: %d, Skipped: %d, Errors: %d\n",
|
||||
#' result$created, result$skipped, result$errors))
|
||||
#' }
|
||||
#'
|
||||
crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {
|
||||
|
||||
created <- 0
|
||||
skipped <- 0
|
||||
errors <- 0
|
||||
|
||||
# Load raster
|
||||
if (!file.exists(tif_path)) {
|
||||
safe_log(paste("ERROR: TIFF not found:", tif_path), "ERROR")
|
||||
return(list(created = 0, skipped = 0, errors = 1))
|
||||
}
|
||||
|
||||
rast <- tryCatch({
|
||||
rast(tif_path)
|
||||
}, error = function(e) {
|
||||
safe_log(paste("ERROR loading raster:", e$message), "ERROR")
|
||||
return(NULL)
|
||||
})
|
||||
|
||||
if (is.null(rast)) {
|
||||
return(list(created = 0, skipped = 0, errors = 1))
|
||||
}
|
||||
|
||||
# Create raster bounding box in raster CRS
|
||||
rast_bbox <- st_as_sfc(st_bbox(rast))
|
||||
st_crs(rast_bbox) <- st_crs(rast)
|
||||
|
||||
# Reproject fields to match raster CRS
|
||||
fields_reprojected <- st_transform(fields, st_crs(rast_bbox))
|
||||
|
||||
# Find which fields intersect this raster (CRITICAL: raster bbox first, then fields)
|
||||
overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE)
|
||||
overlapping_indices <- unique(unlist(overlapping_indices))
|
||||
|
||||
if (length(overlapping_indices) == 0) {
|
||||
safe_log(paste("No fields intersect TIFF:", basename(tif_path)), "INFO")
|
||||
return(list(created = 0, skipped = 0, errors = 0))
|
||||
}
|
||||
|
||||
# Process each overlapping field
|
||||
for (field_idx in overlapping_indices) {
|
||||
field_name <- fields$field[field_idx]
|
||||
field_geom <- fields_reprojected[field_idx, ]
|
||||
|
||||
# Create field directory
|
||||
field_dir <- file.path(output_base_dir, field_name)
|
||||
if (!dir.exists(field_dir)) {
|
||||
dir.create(field_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
|
||||
# Output file path
|
||||
output_path <- file.path(field_dir, paste0(tif_date, ".tif"))
|
||||
|
||||
# Check if file already exists (idempotent)
|
||||
if (file.exists(output_path)) {
|
||||
skipped <- skipped + 1
|
||||
next
|
||||
}
|
||||
|
||||
# Crop raster to field boundary
|
||||
tryCatch({
|
||||
field_rast <- crop(rast, field_geom)
|
||||
writeRaster(field_rast, output_path, overwrite = TRUE)
|
||||
created <- created + 1
|
||||
}, error = function(e) {
|
||||
safe_log(paste("ERROR cropping field", field_name, ":", e$message), "ERROR")
|
||||
errors <<- errors + 1
|
||||
})
|
||||
}
|
||||
|
||||
return(list(created = created, skipped = skipped, errors = errors))
|
||||
}
|
||||
|
||||
# ==============================================================================
|
||||
#' Process New Merged TIFFs into Per-Field Structure
|
||||
#'
|
||||
#' Batch processes all new 4-band raw satellite TIFFs from merged_tif/ directory.
|
||||
#' Crops each TIFF to field boundaries and stores results in per-field subdirectories.
|
||||
#' Supports migration mode: skips dates already processed and migrated to field_tiles_CI/.
|
||||
#'
|
||||
#' @param merged_tif_dir Character. Source directory containing raw merged TIFFs
|
||||
#' (e.g., "merged_tif/").
|
||||
#' @param field_tiles_dir Character. Target directory for per-field TIFFs
|
||||
#' (e.g., "field_tiles/").
|
||||
#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column.
|
||||
#' @param field_tiles_ci_dir Character. Optional. Directory where migrated CI-calculated
|
||||
#' TIFFs are stored. If provided, skips dates
|
||||
#' already processed and moved to field_tiles_CI/.
|
||||
#' Default: NULL (process all dates).
|
||||
#' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD).
|
||||
#' Default: NULL (process all available dates).
|
||||
#' @param offset Integer. Optional. Number of days to look back from end_date.
|
||||
#' Only used if end_date is also provided.
|
||||
#' Default: NULL (process all available dates).
|
||||
#'
|
||||
#' @return List with elements:
|
||||
#' - total_created: Integer. Total field TIFFs created across all dates
|
||||
#' - total_skipped: Integer. Total files that already existed
|
||||
#' - total_errors: Integer. Total cropping errors across all dates
|
||||
#'
|
||||
#' @details
|
||||
#' **Migration Logic:**
|
||||
#' When migration phase is active (field_tiles_CI/ contains CI-calculated TIFFs),
|
||||
#' this function detects which dates have been migrated and skips reprocessing them.
|
||||
#' This prevents redundant work during the per-field architecture transition.
|
||||
#'
|
||||
#' **Date Detection:**
|
||||
#' Finds all files in merged_tif_dir matching pattern: YYYY-MM-DD.tif
|
||||
#'
|
||||
#' **Output Structure:**
|
||||
#' Creates per-field subdirectories and stores: field_tiles/{FIELD}/{DATE}.tif
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' fields <- load_field_boundaries("path/to/pivot.geojson")
|
||||
#' result <- process_new_merged_tif(
|
||||
#' merged_tif_dir = "merged_tif",
|
||||
#' field_tiles_dir = "field_tiles",
|
||||
#' fields = fields,
|
||||
#' field_tiles_ci_dir = "field_tiles_CI"
|
||||
#' )
|
||||
#' cat(sprintf("Processing complete: created=%d, skipped=%d, errors=%d\n",
|
||||
#' result$total_created, result$total_skipped, result$total_errors))
|
||||
#' }
|
||||
#'
|
||||
process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL,
|
||||
end_date = NULL, offset = NULL) {
|
||||
|
||||
safe_log("\n========================================", "INFO")
|
||||
safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO")
|
||||
safe_log("========================================", "INFO")
|
||||
|
||||
# Check if download directory exists
|
||||
if (!dir.exists(merged_tif_dir)) {
|
||||
safe_log("No merged_tif/ directory found - no new data to process", "WARNING")
|
||||
return(list(total_created = 0, total_skipped = 0, total_errors = 0))
|
||||
}
|
||||
|
||||
# Create output directory
|
||||
if (!dir.exists(field_tiles_dir)) {
|
||||
dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
|
||||
# Find all date-pattern TIFFs in root of merged_tif
|
||||
tiff_files <- list.files(
|
||||
merged_tif_dir,
|
||||
pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$",
|
||||
full.names = TRUE
|
||||
)
|
||||
|
||||
# FILTER by date window if end_date and offset provided
|
||||
if (!is.null(end_date) && !is.null(offset)) {
|
||||
start_date <- end_date - offset
|
||||
date_range <- seq(start_date, end_date, by = "day")
|
||||
date_range_str <- format(date_range, "%Y-%m-%d")
|
||||
|
||||
# Extract dates from filenames and filter
|
||||
tiff_dates <- gsub("\\.tif$", "", basename(tiff_files))
|
||||
tiff_files <- tiff_files[tiff_dates %in% date_range_str]
|
||||
|
||||
safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO")
|
||||
}
|
||||
|
||||
safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO")
|
||||
|
||||
if (length(tiff_files) == 0) {
|
||||
safe_log("No new TIFFs found - nothing to process", "WARNING")
|
||||
return(list(total_created = 0, total_skipped = 0, total_errors = 0))
|
||||
}
|
||||
|
||||
# Process each new TIFF
|
||||
total_created <- 0
|
||||
total_skipped <- 0
|
||||
total_errors <- 0
|
||||
|
||||
for (tif_path in tiff_files) {
|
||||
tif_date <- gsub("\\.tif$", "", basename(tif_path))
|
||||
|
||||
# CHECK 1: Skip if this date was already migrated to field_tiles_CI/
|
||||
# (This means Script 20 already processed it and extracted RDS)
|
||||
if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) {
|
||||
# Check if ANY field has this date in field_tiles_CI/
|
||||
date_migrated <- FALSE
|
||||
|
||||
# Sample check: look for date in field_tiles_CI/*/DATE.tif
|
||||
sample_field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
|
||||
for (field_dir in sample_field_dirs) {
|
||||
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
|
||||
if (file.exists(potential_file)) {
|
||||
date_migrated <- TRUE
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (date_migrated) {
|
||||
safe_log(paste("Skipping:", tif_date, "(already migrated and processed by Script 20)"), "INFO")
|
||||
total_skipped <- total_skipped + 1
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# CHECK 2: Skip if this date already exists in field_tiles/
|
||||
# (means this date has already been processed through Script 10)
|
||||
if (dir.exists(field_tiles_dir)) {
|
||||
date_exists_in_field_tiles <- FALSE
|
||||
|
||||
# Check if ANY field directory has this date
|
||||
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||
for (field_dir in field_dirs) {
|
||||
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
|
||||
if (file.exists(potential_file)) {
|
||||
date_exists_in_field_tiles <- TRUE
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (date_exists_in_field_tiles) {
|
||||
safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO")
|
||||
total_skipped <- total_skipped + 1
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
safe_log(paste("Processing:", tif_date), "INFO")
|
||||
|
||||
result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir)
|
||||
total_created <- total_created + result$created
|
||||
total_skipped <- total_skipped + result$skipped
|
||||
total_errors <- total_errors + result$errors
|
||||
}
|
||||
|
||||
safe_log(paste("Processing complete: created =", total_created,
|
||||
", skipped =", total_skipped, ", errors =", total_errors), "INFO")
|
||||
|
||||
return(list(total_created = total_created, total_skipped = total_skipped,
|
||||
total_errors = total_errors))
|
||||
}
|
||||
|
|
@ -1,202 +0,0 @@
|
|||
# CI_EXTRACTION.R
|
||||
# ==============
|
||||
# This script processes satellite imagery to extract Canopy Index (CI) values for agricultural fields.
|
||||
# It handles image processing, masking, and extraction of statistics by field/sub-field.
|
||||
# Supports both 4-band and 8-band PlanetScope data with automatic band detection and cloud masking.
|
||||
#
|
||||
# Usage: Rscript 02_ci_extraction.R [end_date] [offset] [project_dir] [data_source]
|
||||
# - end_date: End date for processing (YYYY-MM-DD format)
|
||||
# - offset: Number of days to look back from end_date
|
||||
# - project_dir: Project directory name (e.g., "angata", "aura", "chemba")
|
||||
# - data_source: Data source directory - "merged_tif_8b" (default) or "merged_tif" (4-band) or "merged_final_tif"
|
||||
# If tiles exist (daily_tiles_split/), they are used automatically
|
||||
#
|
||||
# Examples:
|
||||
# # Angata 8-band data (with UDM cloud masking)
|
||||
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
|
||||
#
|
||||
# # Aura 4-band data
|
||||
# Rscript 20_ci_extraction.R 2025-11-26 7 aura merged_tif
|
||||
#
|
||||
# # Auto-detects and uses tiles if available:
|
||||
# Rscript 20_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists)
|
||||
|
||||
# 1. Load required packages
|
||||
# -----------------------
|
||||
suppressPackageStartupMessages({
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
library(readxl)
|
||||
library(here)
|
||||
library(furrr)
|
||||
library(future)
|
||||
})
|
||||
|
||||
# 2. Process command line arguments
|
||||
# ------------------------------
|
||||
main <- function() {
|
||||
# Capture command line arguments
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
# Process end_date argument
|
||||
if (length(args) >= 1 && !is.na(args[1])) {
|
||||
end_date <- as.Date(args[1])
|
||||
if (is.na(end_date)) {
|
||||
warning("Invalid end_date provided. Using default (current date).")
|
||||
end_date <- Sys.Date()
|
||||
#end_date <- "2023-10-01"
|
||||
}
|
||||
} else {
|
||||
end_date <- Sys.Date()
|
||||
#end_date <- "2023-10-01"
|
||||
}
|
||||
|
||||
# Process offset argument
|
||||
if (length(args) >= 2 && !is.na(args[2])) {
|
||||
offset <- as.numeric(args[2])
|
||||
if (is.na(offset) || offset <= 0) {
|
||||
warning("Invalid offset provided. Using default (7 days).")
|
||||
offset <- 7
|
||||
}
|
||||
} else {
|
||||
offset <- 7
|
||||
}
|
||||
|
||||
# Process project_dir argument
|
||||
if (length(args) >= 3 && !is.na(args[3])) {
|
||||
project_dir <- as.character(args[3])
|
||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
||||
} else {
|
||||
project_dir <- "angata" # Changed default from "aura" to "esa"
|
||||
}
|
||||
|
||||
# Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif)
|
||||
if (length(args) >= 4 && !is.na(args[4])) {
|
||||
data_source <- as.character(args[4])
|
||||
# Validate data_source is a recognized option
|
||||
if (!data_source %in% c("merged_tif_8b", "merged_tif", "merged_final_tif")) {
|
||||
warning(paste("Data source", data_source, "not in standard list. Using as-is."))
|
||||
}
|
||||
} else if (exists("data_source", envir = .GlobalEnv)) {
|
||||
data_source <- get("data_source", envir = .GlobalEnv)
|
||||
} else {
|
||||
data_source <- "merged_tif_8b" # Default to 8-band (newer data with cloud masking)
|
||||
}
|
||||
|
||||
# Make project_dir and data_source available globally
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
assign("data_source", data_source, envir = .GlobalEnv)
|
||||
|
||||
cat(sprintf("CI Extraction: project=%s, end_date=%s, offset=%d days, data_source=%s\n",
|
||||
project_dir, format(end_date, "%Y-%m-%d"), offset, data_source))
|
||||
|
||||
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
|
||||
ci_extraction_script <- TRUE
|
||||
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
|
||||
|
||||
# 3. Initialize project configuration
|
||||
# --------------------------------
|
||||
new_project_question <- TRUE
|
||||
|
||||
cat("[DEBUG] Attempting to source r_app/parameters_project.R\n")
|
||||
tryCatch({
|
||||
source("r_app/parameters_project.R")
|
||||
cat("[DEBUG] Successfully sourced r_app/parameters_project.R\n")
|
||||
}, error = function(e) {
|
||||
cat("[ERROR] Failed to source r_app/parameters_project.R:\n", e$message, "\n")
|
||||
stop(e)
|
||||
})
|
||||
|
||||
cat("[DEBUG] Attempting to source r_app/20_ci_extraction_utils.R\n")
|
||||
tryCatch({
|
||||
source("r_app/20_ci_extraction_utils.R")
|
||||
cat("[DEBUG] Successfully sourced r_app/20_ci_extraction_utils.R\n")
|
||||
}, error = function(e) {
|
||||
cat("[ERROR] Failed to source r_app/20_ci_extraction_utils.R:\n", e$message, "\n")
|
||||
stop(e)
|
||||
})
|
||||
|
||||
|
||||
# 4. Generate date list for processing
|
||||
# ---------------------------------
|
||||
dates <- date_list(end_date, 7)
|
||||
log_message(paste("Processing data for week", dates$week, "of", dates$year))
|
||||
|
||||
# 5. Find and filter raster files by date - with grid size detection
|
||||
# -----------------------------------
|
||||
log_message("Searching for raster files")
|
||||
|
||||
# Check if tiles exist (Script 01 output) - detect grid size dynamically
|
||||
tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split")
|
||||
|
||||
# Detect grid size from daily_tiles_split folder structure
|
||||
# Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc.
|
||||
grid_size <- NA
|
||||
if (dir.exists(tiles_split_base)) {
|
||||
subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
|
||||
# Look for grid size patterns like "5x5", "10x10", "20x20"
|
||||
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
||||
if (length(grid_patterns) > 0) {
|
||||
grid_size <- grid_patterns[1] # Use first grid size found
|
||||
log_message(paste("Detected grid size:", grid_size))
|
||||
}
|
||||
}
|
||||
|
||||
# Construct tile folder path with grid size
|
||||
if (!is.na(grid_size)) {
|
||||
tile_folder <- file.path(tiles_split_base, grid_size)
|
||||
} else {
|
||||
tile_folder <- tiles_split_base
|
||||
}
|
||||
|
||||
use_tiles <- dir.exists(tile_folder)
|
||||
|
||||
# Make grid_size available globally for other functions
|
||||
assign("grid_size", grid_size, envir = .GlobalEnv)
|
||||
|
||||
tryCatch({
|
||||
if (use_tiles) {
|
||||
# Use tile-based processing
|
||||
log_message(paste("Tile folder detected at", tile_folder))
|
||||
log_message("Using tile-based CI extraction")
|
||||
|
||||
# Call the tile-based extraction function
|
||||
process_ci_values_from_tiles(
|
||||
dates = dates,
|
||||
tile_folder = tile_folder,
|
||||
field_boundaries = field_boundaries,
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
daily_CI_vals_dir = daily_CI_vals_dir,
|
||||
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
|
||||
merged_final_dir = merged_final,
|
||||
grid_size = grid_size
|
||||
)
|
||||
|
||||
} else {
|
||||
# Use legacy full-extent processing
|
||||
log_message("No tiles found. Using legacy full-extent approach")
|
||||
|
||||
# Use the existing utility function to find satellite images
|
||||
existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter)
|
||||
log_message(paste("Found", length(existing_files), "raster files for processing"))
|
||||
|
||||
# Process raster files and create VRT
|
||||
vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt)
|
||||
|
||||
# Process and combine CI values
|
||||
process_ci_values(dates, field_boundaries, merged_final,
|
||||
field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir)
|
||||
}
|
||||
|
||||
}, error = function(e) {
|
||||
log_message(paste("Error in main processing:", e$message), level = "ERROR")
|
||||
stop(e$message)
|
||||
})
|
||||
}
|
||||
|
||||
if (sys.nframe() == 0) {
|
||||
main()
|
||||
}
|
||||
238
r_app/20_ci_extraction_per_field.R
Normal file
238
r_app/20_ci_extraction_per_field.R
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
# CI_EXTRACTION_PER_FIELD.R
|
||||
# =========================
|
||||
# Script 20 (Refactored for Per-Field Architecture)
|
||||
#
|
||||
# This script reads per-field TIFFs from Script 10 output and:
|
||||
# 1. Calculates Canopy Index (CI) from 4-band imagery (RGB + NIR)
|
||||
# 2. Outputs 5-band TIFFs with CI as the 5th band to field_tiles_CI/{FIELD}/{DATE}.tif
|
||||
# 3. Outputs per-field per-date RDS files to daily_vals/{FIELD}/{DATE}.rds
|
||||
#
|
||||
# Key differences from legacy Script 20:
|
||||
# - Input: field_tiles/{FIELD}/{DATE}.tif (4-band, from Script 10)
|
||||
# - Output: field_tiles_CI/{FIELD}/{DATE}.tif (5-band with CI)
|
||||
# - Output: daily_vals/{FIELD}/{DATE}.rds (per-field CI statistics)
|
||||
# - Directly extracts CI statistics per sub_field within each field
|
||||
#
|
||||
# Usage:
|
||||
# Rscript 20_ci_extraction_per_field.R [project_dir] [end_date] [offset]
|
||||
# Example: Rscript 20_ci_extraction_per_field.R angata 2026-01-02 7
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
library(here)
|
||||
})
|
||||
|
||||
# =============================================================================
|
||||
# Main Processing
|
||||
# =============================================================================
|
||||
|
||||
main <- function() {
|
||||
# STEP 1: Set working directory to project root (smartcane/)
|
||||
# This ensures all relative paths resolve correctly
|
||||
if (basename(getwd()) == "r_app") {
|
||||
setwd("..")
|
||||
}
|
||||
|
||||
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
|
||||
# Parse command-line arguments FIRST
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||
end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date()
|
||||
offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
|
||||
|
||||
# Make project_dir available globally for parameters_project.R
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
|
||||
# Load parameters_project.R (provides safe_log, date_list, setup_project_directories, etc.)
|
||||
tryCatch({
|
||||
source("r_app/parameters_project.R")
|
||||
}, error = function(e) {
|
||||
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
|
||||
stop(e)
|
||||
})
|
||||
|
||||
# Load CI extraction utilities
|
||||
tryCatch({
|
||||
source("r_app/20_ci_extraction_utils.R")
|
||||
}, error = function(e) {
|
||||
cat(sprintf("Error loading 20_ci_extraction_utils.R: %s\n", e$message))
|
||||
stop(e)
|
||||
})
|
||||
|
||||
# STEP 3: Now all utilities are loaded, proceed with script logic
|
||||
safe_log(sprintf("=== Script 20: CI Extraction Per-Field ==="))
|
||||
safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days",
|
||||
project_dir, format(end_date, "%Y-%m-%d"), offset))
|
||||
|
||||
# Set up directory paths from parameters
|
||||
setup <- setup_project_directories(project_dir)
|
||||
|
||||
# Load field boundaries directly from field_boundaries_path in setup
|
||||
tryCatch({
|
||||
field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE)
|
||||
safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path))
|
||||
}, error = function(e) {
|
||||
safe_log(sprintf("Error loading field boundaries from %s: %s", setup$field_boundaries_path, e$message), "ERROR")
|
||||
stop(e)
|
||||
})
|
||||
|
||||
# Get list of dates to process
|
||||
# If in migration mode, dates_to_process is provided by the pipeline runner
|
||||
if (exists("dates_to_process") && !is.null(dates_to_process)) {
|
||||
# Migration mode: Use provided list of dates (process ALL available dates)
|
||||
dates_filter <- sort(dates_to_process)
|
||||
safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter)))
|
||||
} else {
|
||||
# Normal mode: Use 7-day offset window
|
||||
dates <- date_list(end_date, offset)
|
||||
dates_filter <- dates$days_filter
|
||||
safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)",
|
||||
dates$start_date, dates$end_date, length(dates_filter)))
|
||||
}
|
||||
|
||||
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
|
||||
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
|
||||
safe_log(sprintf("Output RDS directory: %s", setup$daily_ci_vals_dir))
|
||||
|
||||
# Process each field
|
||||
if (!dir.exists(setup$field_tiles_dir)) {
|
||||
safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR")
|
||||
stop("Script 10 output not found. Run Script 10 first.")
|
||||
}
|
||||
|
||||
fields <- list.dirs(setup$field_tiles_dir, full.names = FALSE, recursive = FALSE)
|
||||
fields <- fields[fields != ""] # Remove empty strings
|
||||
|
||||
if (length(fields) == 0) {
|
||||
safe_log("No fields found in field_tiles directory", "WARNING")
|
||||
return()
|
||||
}
|
||||
|
||||
safe_log(sprintf("Found %d fields to process", length(fields)))
|
||||
|
||||
# DEBUG: Check what paths are available in setup
|
||||
safe_log(sprintf("[DEBUG] Available setup paths: %s", paste(names(setup), collapse=", ")))
|
||||
safe_log(sprintf("[DEBUG] field_tiles_ci_dir: %s", setup$field_tiles_ci_dir))
|
||||
safe_log(sprintf("[DEBUG] daily_ci_vals_dir: %s", setup$daily_ci_vals_dir))
|
||||
|
||||
# Use daily_ci_vals_dir for per-field daily CI output
|
||||
# Pre-create output subdirectories for all fields
|
||||
for (field in fields) {
|
||||
dir.create(file.path(setup$field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE)
|
||||
if (!is.null(setup$daily_ci_vals_dir)) {
|
||||
dir.create(file.path(setup$daily_ci_vals_dir, field), showWarnings = FALSE, recursive = TRUE)
|
||||
}
|
||||
}
|
||||
|
||||
# Process each DATE (load merged TIFF once, extract all fields from it)
|
||||
total_success <- 0
|
||||
total_error <- 0
|
||||
|
||||
for (date_str in dates_filter) {
|
||||
# Load the MERGED TIFF (farm-wide) ONCE for this date
|
||||
input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
|
||||
|
||||
if (!file.exists(input_tif_merged)) {
|
||||
safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
|
||||
total_error <<- total_error + 1
|
||||
next
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
# Load 4-band TIFF ONCE
|
||||
raster_4band <- terra::rast(input_tif_merged)
|
||||
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
|
||||
|
||||
# Calculate CI from 4-band
|
||||
ci_raster <- calc_ci_from_raster(raster_4band)
|
||||
|
||||
# Create 5-band (R, G, B, NIR, CI)
|
||||
five_band <- c(raster_4band, ci_raster)
|
||||
|
||||
# Now process all fields from this single merged TIFF
|
||||
fields_processed_this_date <- 0
|
||||
|
||||
for (field in fields) {
|
||||
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
|
||||
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
|
||||
|
||||
# Pre-create output directories
|
||||
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
|
||||
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
|
||||
|
||||
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
|
||||
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
|
||||
|
||||
# MODE 3: Skip if both outputs already exist
|
||||
if (file.exists(output_tif) && file.exists(output_rds)) {
|
||||
next
|
||||
}
|
||||
|
||||
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists
|
||||
if (file.exists(output_tif) && !file.exists(output_rds)) {
|
||||
tryCatch({
|
||||
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
|
||||
fields_processed_this_date <- fields_processed_this_date + 1
|
||||
}, error = function(e) {
|
||||
# Continue to next field
|
||||
})
|
||||
next
|
||||
}
|
||||
|
||||
# MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
|
||||
tryCatch({
|
||||
# Crop 5-band TIFF to field boundary
|
||||
field_geom <- field_boundaries_sf %>% filter(field == !!field)
|
||||
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
|
||||
|
||||
# Save 5-band field TIFF
|
||||
terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
|
||||
|
||||
# Extract CI statistics by sub_field (from cropped CI raster)
|
||||
ci_cropped <- five_band_cropped[[5]] # 5th band is CI
|
||||
ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field)
|
||||
|
||||
# Save RDS
|
||||
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
|
||||
saveRDS(ci_stats, output_rds)
|
||||
}
|
||||
|
||||
fields_processed_this_date <- fields_processed_this_date + 1
|
||||
|
||||
}, error = function(e) {
|
||||
# Error in individual field, continue to next
|
||||
safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING")
|
||||
})
|
||||
}
|
||||
|
||||
# Increment success counter if at least one field succeeded
|
||||
if (fields_processed_this_date > 0) {
|
||||
total_success <<- total_success + 1
|
||||
safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date))
|
||||
}
|
||||
|
||||
}, error = function(e) {
|
||||
total_error <<- total_error + 1
|
||||
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
|
||||
})
|
||||
}
|
||||
|
||||
# Summary
|
||||
safe_log(sprintf("\n=== Processing Complete ==="))
|
||||
safe_log(sprintf("Successfully processed: %d", total_success))
|
||||
safe_log(sprintf("Errors encountered: %d", total_error))
|
||||
|
||||
if (total_success > 0) {
|
||||
safe_log("Output files created in:")
|
||||
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
|
||||
safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir))
|
||||
}
|
||||
}
|
||||
|
||||
# Execute main if called from command line
|
||||
if (sys.nframe() == 0) {
|
||||
main()
|
||||
}
|
||||
|
|
@ -6,24 +6,10 @@
|
|||
#
|
||||
# Parallel Processing: Tile-based extraction uses furrr::future_map to process
|
||||
# multiple tiles simultaneously (typically 2-4 tiles in parallel depending on CPU cores)
|
||||
|
||||
#' Safe logging function that works whether log_message exists or not
|
||||
#'
|
||||
#' @param message The message to log
|
||||
#' @param level The log level (default: "INFO")
|
||||
#' @return NULL (used for side effects)
|
||||
#'
|
||||
safe_log <- function(message, level = "INFO") {
|
||||
if (exists("log_message")) {
|
||||
log_message(message, level)
|
||||
} else {
|
||||
if (level %in% c("ERROR", "WARNING")) {
|
||||
warning(message)
|
||||
} else {
|
||||
message(message)
|
||||
}
|
||||
}
|
||||
}
|
||||
#
|
||||
# Per-Field Functions (Script 20):
|
||||
# - calc_ci_from_raster(): Calculate CI from 4-band raster (Chlorophyll Index formula: NIR/Green - 1)
|
||||
# - extract_ci_by_subfield(): Extract per-sub_field CI statistics from raster
|
||||
|
||||
#' Generate a sequence of dates for processing
|
||||
#'
|
||||
|
|
@ -216,9 +202,10 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
|
|||
names(blue_band) <- "Blue"
|
||||
names(nir_band) <- "NIR"
|
||||
|
||||
# Calculate Canopy Index from Red, Green, NIR
|
||||
# CI = (NIR - Red) / (NIR + Red) is a common formulation
|
||||
# But using NIR/Green - 1 is also valid and more sensitive to green vegetation
|
||||
# Calculate Canopy Index from Green and NIR
|
||||
# *** CRITICAL: Use CHLOROPHYLL INDEX formula ONLY ***
|
||||
# CORRECT: CI = NIR / Green - 1 (ranges ~1-7, sensitive to active chlorophyll)
|
||||
# WRONG: Do NOT use (NIR-Red)/(NIR+Red) - that is NDVI, ranges -1 to 1, different scale
|
||||
CI <- nir_band / green_band - 1
|
||||
names(CI) <- "CI"
|
||||
|
||||
|
|
@ -294,7 +281,6 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
|
|||
|
||||
# Write output files
|
||||
terra::writeRaster(output_raster, new_file, overwrite = TRUE)
|
||||
terra::vrt(new_file, vrt_file, overwrite = TRUE)
|
||||
|
||||
# Check if the result has enough valid pixels
|
||||
valid_pixels <- terra::global(output_raster$CI, "notNA", na.rm=TRUE)
|
||||
|
|
@ -738,7 +724,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
|
|||
date = date,
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
daily_CI_vals_dir = daily_CI_vals_dir,
|
||||
merged_final_tif_dir = merged_final_dir,
|
||||
grid_size = grid_size
|
||||
)
|
||||
|
||||
|
|
@ -809,7 +794,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
|
|||
date = date,
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
daily_CI_vals_dir = daily_CI_vals_dir,
|
||||
merged_final_tif_dir = merged_final_dir,
|
||||
grid_size = grid_size
|
||||
)
|
||||
|
||||
|
|
@ -850,7 +834,7 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
|
|||
#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure
|
||||
#' @return Data frame with field CI statistics for this tile, or NULL if processing failed
|
||||
#'
|
||||
process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = NA) {
|
||||
process_single_tile <- function(tile_file, field_boundaries_sf, date, grid_size = NA) {
|
||||
tryCatch({
|
||||
tile_filename <- basename(tile_file)
|
||||
safe_log(paste(" [TILE] Loading:", tile_filename))
|
||||
|
|
@ -892,25 +876,10 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin
|
|||
output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band)
|
||||
names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||
|
||||
# Save processed tile to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ with same filename
|
||||
# This mirrors the input structure: daily_tiles_split/[GRID_SIZE]/[DATE]/
|
||||
if (!is.na(grid_size)) {
|
||||
date_dir <- file.path(merged_final_tif_dir, grid_size, date)
|
||||
} else {
|
||||
date_dir <- file.path(merged_final_tif_dir, date)
|
||||
}
|
||||
|
||||
if (!dir.exists(date_dir)) {
|
||||
dir.create(date_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
|
||||
# Use same filename as source tile (e.g., 2026-01-02_01.tif)
|
||||
tile_filename <- basename(tile_file)
|
||||
output_file <- file.path(date_dir, tile_filename)
|
||||
|
||||
# Write processed tile
|
||||
terra::writeRaster(output_raster, output_file, overwrite = TRUE)
|
||||
safe_log(paste(" [SAVED TIFF] Output:", file.path(date, basename(output_file)), "(5 bands: Red, Green, Blue, NIR, CI)"))
|
||||
# NOTE: Do NOT save processed tile - it's an intermediate only
|
||||
# The purpose is to calculate field-level CI statistics, not to create permanent tile files
|
||||
# This prevents bloat in merged_final_tif/ directory (would unnecessarily duplicate
|
||||
# daily_tiles_split data with an extra CI band added)
|
||||
|
||||
# Extract statistics per field from CI band
|
||||
field_bbox <- sf::st_bbox(field_boundaries_sf)
|
||||
|
|
@ -951,7 +920,7 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin
|
|||
#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure
|
||||
#' @return Data frame with field CI statistics for the date
|
||||
#'
|
||||
extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL, grid_size = NA) {
|
||||
extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, grid_size = NA) {
|
||||
|
||||
if (!inherits(field_boundaries_sf, "sf")) {
|
||||
field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf)
|
||||
|
|
@ -970,7 +939,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
|
|||
stats_list <- tryCatch({
|
||||
furrr::future_map(
|
||||
tile_files,
|
||||
~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size),
|
||||
~ process_single_tile(.x, field_boundaries_sf, date, grid_size = grid_size),
|
||||
.progress = FALSE,
|
||||
.options = furrr::furrr_options(seed = TRUE)
|
||||
)
|
||||
|
|
@ -980,7 +949,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
|
|||
lapply(
|
||||
tile_files,
|
||||
function(tile_file) {
|
||||
process_single_tile(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size)
|
||||
process_single_tile(tile_file, field_boundaries_sf, date, grid_size = grid_size)
|
||||
}
|
||||
)
|
||||
})
|
||||
|
|
@ -1013,3 +982,208 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
|
|||
|
||||
return(aggregated)
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Script 20 (Per-Field) Specific Functions
|
||||
# =============================================================================
|
||||
|
||||
#' Calculate Canopy Index (CI) from 4-band raster
|
||||
#'
|
||||
#' *** CRITICAL FORMULA: CI = NIR / Green - 1 ***
|
||||
#' This is the CHLOROPHYLL INDEX formula (ranges ~1-7 for vegetation).
|
||||
#' NOT NDVI! Do NOT use (NIR-Red)/(NIR+Red) - that produces -1 to 1 range.
|
||||
#'
|
||||
#' Expects band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4)
|
||||
#'
|
||||
#' @param raster_obj Loaded raster object with at least 4 bands
|
||||
#' @return Raster object containing CI values (Chlorophyll Index, ranges ~1-7)
|
||||
#'
|
||||
calc_ci_from_raster <- function(raster_obj) {
|
||||
# Expected band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4)
|
||||
if (terra::nlyr(raster_obj) < 4) {
|
||||
stop("Raster has fewer than 4 bands. Cannot calculate CI.")
|
||||
}
|
||||
|
||||
green <- terra::subset(raster_obj, 2) # Green band (required for proper CI calculation)
|
||||
nir <- terra::subset(raster_obj, 4) # NIR
|
||||
|
||||
# *** CHLOROPHYLL INDEX = NIR / Green - 1 ***
|
||||
# This formula is sensitive to active chlorophyll content and ranges ~1-7
|
||||
# DO NOT use (NIR-Red)/(NIR+Red) - that is NDVI (Normalized Difference Vegetation Index)
|
||||
# NDVI ranges -1 to 1 and is different from Chlorophyll Index
|
||||
ci <- nir / green - 1
|
||||
|
||||
return(ci)
|
||||
}
|
||||
|
||||
#' Extract CI statistics by sub_field from a CI raster
|
||||
#'
|
||||
#' For a given field, masks the CI raster to each sub_field polygon
|
||||
#' and calculates statistics (mean, median, sd, min, max, count).
|
||||
#'
|
||||
#' @param ci_raster Raster object containing CI values
|
||||
#' @param field_boundaries_sf SF object containing field/sub_field polygons
|
||||
#' @param field_name Character string of the field name to process
|
||||
#' @return Data frame with field, sub_field, and CI statistics; NULL if field not found
|
||||
#'
|
||||
extract_ci_by_subfield <- function(ci_raster, field_boundaries_sf, field_name) {
|
||||
# NOTE: Per-field TIFFs are already cropped to field boundaries by Script 10
|
||||
# No need to mask again - just extract all valid pixels from the raster
|
||||
|
||||
# Extract ALL CI values (no masking needed for pre-cropped per-field TIFFs)
|
||||
ci_values <- terra::values(ci_raster, na.rm = TRUE)
|
||||
|
||||
if (length(ci_values) > 0) {
|
||||
result_row <- data.frame(
|
||||
field = field_name,
|
||||
sub_field = field_name, # Use field_name as sub_field since TIFF is already field-specific
|
||||
ci_mean = mean(ci_values, na.rm = TRUE),
|
||||
ci_median = median(ci_values, na.rm = TRUE),
|
||||
ci_sd = sd(ci_values, na.rm = TRUE),
|
||||
ci_min = min(ci_values, na.rm = TRUE),
|
||||
ci_max = max(ci_values, na.rm = TRUE),
|
||||
ci_count = length(ci_values),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
} else {
|
||||
result_row <- data.frame(
|
||||
field = field_name,
|
||||
sub_field = field_name,
|
||||
ci_mean = NA_real_,
|
||||
ci_median = NA_real_,
|
||||
ci_sd = NA_real_,
|
||||
ci_min = NA_real_,
|
||||
ci_max = NA_real_,
|
||||
ci_count = 0,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
}
|
||||
|
||||
return(result_row)
|
||||
}
|
||||
|
||||
#' Extract RDS from existing CI TIFF (Migration/Regeneration Mode)
|
||||
#'
|
||||
#' This function extracts CI statistics from an already-calculated CI TIFF
|
||||
#' without needing to recalculate from raw 4-band imagery.
|
||||
#' Used during migration when field_tiles_CI/ exists but daily_vals/{FIELD}/ is missing.
|
||||
#'
|
||||
#' @param ci_tiff_path Path to the 5-band TIFF containing CI as the 5th band
|
||||
#' @param output_rds_path Path where to save the output RDS file
|
||||
#' @param field_boundaries_sf SF object with field/sub_field polygons
|
||||
#' @param field_name Name of the field to extract
|
||||
#' @return The RDS data frame (invisibly) and saves to disk
|
||||
#'
|
||||
extract_rds_from_ci_tiff <- function(ci_tiff_path, output_rds_path, field_boundaries_sf, field_name) {
|
||||
tryCatch({
|
||||
# Load the 5-band TIFF
|
||||
raster_5band <- terra::rast(ci_tiff_path)
|
||||
|
||||
# Extract CI (5th band)
|
||||
# Assuming structure: [1]=R, [2]=G, [3]=B, [4]=NIR, [5]=CI
|
||||
ci_raster <- raster_5band[[5]]
|
||||
|
||||
# Extract CI statistics by sub_field
|
||||
ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field_name)
|
||||
|
||||
# Save RDS
|
||||
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
|
||||
saveRDS(ci_stats, output_rds_path)
|
||||
return(invisible(ci_stats))
|
||||
} else {
|
||||
safe_log(sprintf("No CI statistics extracted from %s", ci_tiff_path), "WARNING")
|
||||
return(invisible(NULL))
|
||||
}
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(sprintf("Error extracting RDS from CI TIFF: %s", e$message), "ERROR")
|
||||
return(invisible(NULL))
|
||||
})
|
||||
}
|
||||
|
||||
#' Regenerate ALL missing RDS files from existing CI TIFFs (Comprehensive Migration Mode)
|
||||
#'
|
||||
#' This function processes ALL dates in field_tiles_CI/ and extracts RDS for any missing daily_vals/
|
||||
#' files. No date window filtering - processes the entire historical archive.
|
||||
#'
|
||||
#' Used for one-time migration of old projects where field_tiles_CI/ is populated but daily_vals/
|
||||
#' RDS files are missing or incomplete.
|
||||
#'
|
||||
#' @param field_tiles_ci_dir Path to field_tiles_CI/ (input: pre-calculated CI TIFFs)
|
||||
#' @param daily_vals_dir Path to daily_vals/ (output: RDS statistics files)
|
||||
#' @param field_boundaries_sf SF object with field/sub_field polygons
|
||||
#' @param fields Vector of field names to process
|
||||
#'
|
||||
#' @return List with summary stats: list(total_processed=N, total_skipped=M, total_errors=K)
|
||||
#'
|
||||
regenerate_all_missing_rds <- function(field_tiles_ci_dir, daily_vals_dir, field_boundaries_sf, fields) {
|
||||
safe_log("\n========================================")
|
||||
safe_log("MIGRATION MODE: REGENERATING ALL MISSING RDS")
|
||||
safe_log("Processing ALL dates in field_tiles_CI/")
|
||||
safe_log("========================================")
|
||||
|
||||
total_processed <- 0
|
||||
total_skipped <- 0
|
||||
total_errors <- 0
|
||||
|
||||
# Iterate through each field
|
||||
for (field in fields) {
|
||||
field_ci_path <- file.path(field_tiles_ci_dir, field)
|
||||
field_daily_vals_path <- file.path(daily_vals_dir, field)
|
||||
|
||||
# Skip if field directory doesn't exist
|
||||
if (!dir.exists(field_ci_path)) {
|
||||
safe_log(sprintf(" Field %s: field_tiles_CI not found (skipping)", field))
|
||||
continue
|
||||
}
|
||||
|
||||
# Create output directory for RDS
|
||||
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
|
||||
|
||||
# Find ALL CI TIFFs for this field (no date filtering)
|
||||
ci_tiff_files <- list.files(
|
||||
path = field_ci_path,
|
||||
pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$",
|
||||
full.names = TRUE
|
||||
)
|
||||
|
||||
if (length(ci_tiff_files) == 0) {
|
||||
safe_log(sprintf(" Field %s: No CI TIFFs found (skipping)", field))
|
||||
next
|
||||
}
|
||||
|
||||
safe_log(sprintf(" Field %s: Found %d CI TIFFs to process", field, length(ci_tiff_files)))
|
||||
|
||||
# Process each CI TIFF
|
||||
for (ci_tiff in ci_tiff_files) {
|
||||
date_str <- tools::file_path_sans_ext(basename(ci_tiff))
|
||||
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
|
||||
|
||||
# Skip if RDS already exists
|
||||
if (file.exists(output_rds)) {
|
||||
total_skipped <- total_skipped + 1
|
||||
next
|
||||
}
|
||||
|
||||
# Extract RDS from CI TIFF
|
||||
tryCatch({
|
||||
extract_rds_from_ci_tiff(ci_tiff, output_rds, field_boundaries_sf, field)
|
||||
safe_log(sprintf(" %s: ✓ RDS extracted", date_str))
|
||||
total_processed <- total_processed + 1
|
||||
}, error = function(e) {
|
||||
safe_log(sprintf(" %s: ✗ Error - %s", date_str, e$message), "ERROR")
|
||||
total_errors <<- total_errors + 1
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
safe_log(sprintf("\nMigration complete: processed %d, skipped %d, errors %d",
|
||||
total_processed, total_skipped, total_errors))
|
||||
|
||||
return(list(
|
||||
total_processed = total_processed,
|
||||
total_skipped = total_skipped,
|
||||
total_errors = total_errors
|
||||
))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,22 +1,62 @@
|
|||
# 02b_CONVERT_CI_RDS_TO_CSV.R
|
||||
# ============================
|
||||
# Convert combined_CI_data.rds (output of script 02) to CSV format for Python
|
||||
# This script runs AFTER script 02 (CI extraction) and creates a CSV that Python
|
||||
# can use for harvest date detection WITHOUT requiring the 'model' column (which
|
||||
# comes from script 03 after interpolation and harvest dates are known).
|
||||
# ============================================================================
|
||||
# SCRIPT 21: Convert CI RDS to CSV (Python Compatibility Format)
|
||||
# ============================================================================
|
||||
# PURPOSE:
|
||||
# Convert consolidated CI data from R's wide format (RDS) to Python-compatible
|
||||
# long format (CSV). Prepares per-field CI time series for harvest detection
|
||||
# models and Python ML workflows without requiring interpolated/modeled values.
|
||||
#
|
||||
# Usage: Rscript 02b_convert_ci_rds_to_csv.R [project_dir]
|
||||
# - project_dir: Project directory name (e.g., "esa", "chemba", "angata")
|
||||
# INPUT DATA:
|
||||
# - Source: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||
# - Format: RDS (interpolated growth model data from Script 30)
|
||||
# - Requirement: Script 30 must have completed growth model interpolation
|
||||
#
|
||||
# Output: CSV file at laravel_app/storage/app/{project_dir}/Data/extracted_ci/cumulative_vals/ci_data_for_python.csv
|
||||
# Columns: field, sub_field, Date, FitData, DOY, value (alias for FitData)
|
||||
# OUTPUT DATA:
|
||||
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
||||
# - Format: CSV (long format)
|
||||
# - Columns: field, sub_field, Date, FitData, DOY, value
|
||||
#
|
||||
# USAGE:
|
||||
# Rscript 21_convert_ci_rds_to_csv.R [project]
|
||||
#
|
||||
# Example (Windows PowerShell):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||
#
|
||||
# PARAMETERS:
|
||||
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||||
#
|
||||
# CLIENT TYPES:
|
||||
# - cane_supply (ANGATA): Yes - data export
|
||||
# - agronomic_support (AURA): Yes - Python ML integration
|
||||
#
|
||||
# DEPENDENCIES:
|
||||
# - Packages: tidyverse, lubridate, zoo
|
||||
# - Utils files: parameters_project.R
|
||||
# - Input data: combined_CI_data.rds from Script 20
|
||||
# - Data directories: extracted_ci/cumulative_vals/
|
||||
#
|
||||
# NOTES:
|
||||
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
|
||||
# - Handles both wide format and long format inputs from growth model
|
||||
# - DOY (Day of Year): Calculated from date for seasonal analysis
|
||||
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
||||
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
||||
# - Exports complete growth curves with interpolated values for ML training
|
||||
#
|
||||
# RELATED ISSUES:
|
||||
# SC-112: Utilities restructuring
|
||||
# SC-108: Core pipeline improvements
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
library(here)
|
||||
# File path handling
|
||||
library(here) # For relative path resolution (platform-independent file paths)
|
||||
|
||||
# Data manipulation
|
||||
library(tidyverse) # For dplyr, readr (data wrangling and CSV I/O)
|
||||
library(lubridate) # For date/time operations (DOY calculation)
|
||||
library(zoo) # For zoo objects (gap filling, rolling operations)
|
||||
})
|
||||
|
||||
# ============================================================================
|
||||
|
|
@ -140,7 +180,7 @@ main <- function() {
|
|||
|
||||
cat(sprintf("Converting CI RDS to CSV: project=%s\n", project_dir))
|
||||
|
||||
# Initialize project configuration
|
||||
# Initialize project configuration and centralized paths
|
||||
tryCatch({
|
||||
source("parameters_project.R")
|
||||
}, error = function(e) {
|
||||
|
|
@ -152,49 +192,63 @@ main <- function() {
|
|||
})
|
||||
})
|
||||
|
||||
# Define paths
|
||||
ci_data_source_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals")
|
||||
ci_data_output_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "ci_data_for_python")
|
||||
# Load centralized path structure (creates all directories automatically)
|
||||
paths <- setup_project_directories(project_dir)
|
||||
|
||||
# Create output directory if it doesn't exist (for new projects)
|
||||
if (!dir.exists(ci_data_output_dir)) {
|
||||
dir.create(ci_data_output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
cat(sprintf("✓ Created output directory: %s\n", ci_data_output_dir))
|
||||
}
|
||||
# Use centralized paths (no need for dir.create - already handled)
|
||||
ci_data_source_dir <- paths$cumulative_ci_vals_dir
|
||||
ci_data_output_dir <- paths$ci_for_python_dir
|
||||
|
||||
input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds")
|
||||
# Try to load interpolated growth model data from Script 30
|
||||
input_file <- file.path(ci_data_source_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
||||
output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
|
||||
|
||||
# Check if input file exists
|
||||
if (!file.exists(input_file)) {
|
||||
stop(paste("Input file not found:", input_file))
|
||||
stop(paste("Input file not found:", input_file,
|
||||
"\nScript 30 (growth model) must be run before Script 21."))
|
||||
}
|
||||
|
||||
cat(sprintf("Loading: %s\n", input_file))
|
||||
|
||||
# Load RDS file
|
||||
ci_data_wide <- readRDS(input_file) %>%
|
||||
# Load RDS file (from Script 30 - already in long format with interpolated values)
|
||||
ci_data <- readRDS(input_file) %>%
|
||||
as_tibble()
|
||||
|
||||
cat(sprintf(" Loaded %d rows\n", nrow(ci_data_wide)))
|
||||
cat(sprintf(" Format: WIDE (field, sub_field, then dates as columns)\n"))
|
||||
cat(sprintf(" Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", ")))
|
||||
cat(sprintf(" Loaded %d rows\n", nrow(ci_data)))
|
||||
cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", ")))
|
||||
|
||||
# Step 1: Convert from WIDE to LONG format
|
||||
cat("\nStep 1: Converting from wide to long format...\n")
|
||||
ci_data_long <- wide_to_long_ci_data(ci_data_wide)
|
||||
# Check format and prepare for export
|
||||
# If it's already in long format (from Script 30), use as-is
|
||||
# Otherwise, convert from wide to long
|
||||
if ("Date" %in% names(ci_data) || "date" %in% names(ci_data)) {
|
||||
cat(" Detected: LONG format (from growth model)\n")
|
||||
ci_data_long <- ci_data
|
||||
} else {
|
||||
cat(" Detected: WIDE format - converting to long...\n")
|
||||
ci_data_long <- wide_to_long_ci_data(ci_data)
|
||||
}
|
||||
|
||||
# Step 2: Create complete daily sequences with interpolation
|
||||
# Step 1: Ensure Date column exists and is properly formatted
|
||||
ci_data_long <- ci_data_long %>%
|
||||
mutate(
|
||||
Date = as.Date(Date)
|
||||
)
|
||||
|
||||
# Step 2: If interpolated values already present, use them; otherwise create interpolated sequences
|
||||
if ("value" %in% names(ci_data_long)) {
|
||||
# Already has interpolated values from Script 30
|
||||
cat("\nStep 2: Using interpolated values from growth model...\n")
|
||||
ci_data_python <- ci_data_long
|
||||
} else {
|
||||
# Create interpolated daily sequences
|
||||
cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
|
||||
ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
|
||||
|
||||
# Step 3: Validate output
|
||||
cat("\nStep 3: Validating output...")
|
||||
validate_conversion_output(ci_data_python)
|
||||
}
|
||||
|
||||
# Step 4: Save to CSV
|
||||
cat(sprintf("\nStep 4: Saving to CSV...\n"))
|
||||
cat(sprintf(" Output: %s\n", output_file))
|
||||
cat(sprintf("\nStep 4: Saving to CSV...\\n"))
|
||||
cat(sprintf(" Output: %s\\n", output_file))
|
||||
write_csv(ci_data_python, output_file)
|
||||
|
||||
cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
|
||||
|
|
|
|||
|
|
@ -4,57 +4,143 @@
|
|||
# ===================
|
||||
# Utility functions for growth model interpolation and manipulation.
|
||||
# These functions support the creation of continuous growth models from point measurements.
|
||||
#
|
||||
# PERFORMANCE OPTIMIZATION:
|
||||
# - Parallel file I/O: Reads 450k+ RDS files using furrr::future_map_dfr()
|
||||
# - Parallel field interpolation: Processes fields in parallel (1 core per ~100 fields)
|
||||
# - Dynamic CPU detection: Allocates workers based on available cores
|
||||
# - Windows compatible: Uses furrr with plan(multisession) for cross-platform support
|
||||
|
||||
#' Safe logging function that works whether log_message exists or not
|
||||
#' Load and prepare the combined CI data (Per-Field Architecture)
|
||||
#' OPTIMIZE: Filters by date during load (skip unnecessary date ranges)
|
||||
#' PARALLELIZE: Reads 450k+ RDS files in parallel using furrr::future_map_dfr()
|
||||
#'
|
||||
#' @param message The message to log
|
||||
#' @param level The log level (default: "INFO")
|
||||
#' @return NULL (used for side effects)
|
||||
#' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals)
|
||||
#' @param harvesting_data Optional: Dataframe with season dates. If provided, only loads files within season ranges (major speedup)
|
||||
#' @return Long-format dataframe with CI values by date and field
|
||||
#'
|
||||
safe_log <- function(message, level = "INFO") {
|
||||
if (exists("log_message")) {
|
||||
log_message(message, level)
|
||||
load_combined_ci_data <- function(daily_vals_dir, harvesting_data = NULL) {
|
||||
# For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals
|
||||
# Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
||||
|
||||
if (!dir.exists(daily_vals_dir)) {
|
||||
stop(paste("Daily values directory not found:", daily_vals_dir))
|
||||
}
|
||||
|
||||
safe_log(paste("Loading per-field CI data from:", daily_vals_dir))
|
||||
|
||||
# OPTIMIZATION: If harvest data provided, extract date range to avoid loading unnecessary dates
|
||||
date_filter_min <- NULL
|
||||
date_filter_max <- NULL
|
||||
if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
|
||||
date_filter_min <- min(harvesting_data$season_start, na.rm = TRUE)
|
||||
date_filter_max <- max(harvesting_data$season_end, na.rm = TRUE)
|
||||
safe_log(sprintf("Pre-filtering by harvest season dates: %s to %s",
|
||||
format(date_filter_min, "%Y-%m-%d"),
|
||||
format(date_filter_max, "%Y-%m-%d")))
|
||||
}
|
||||
|
||||
# Find all daily RDS files recursively (per-field structure)
|
||||
# IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories
|
||||
all_daily_files <- list.files(
|
||||
path = daily_vals_dir,
|
||||
pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$", # Only YYYY-MM-DD.rds format
|
||||
full.names = TRUE,
|
||||
recursive = TRUE
|
||||
)
|
||||
|
||||
# Further filter: only keep files that are in a subdirectory (per-field structure)
|
||||
# Exclude legacy files at the root level like "extracted_2024-02-29_whole_field.rds"
|
||||
all_daily_files <- all_daily_files[basename(dirname(all_daily_files)) != "daily_vals"]
|
||||
|
||||
if (length(all_daily_files) == 0) {
|
||||
stop(paste("No per-field daily RDS files found in:", daily_vals_dir))
|
||||
}
|
||||
|
||||
safe_log(sprintf("Found %d per-field daily RDS files (filtered from legacy format)", length(all_daily_files)))
|
||||
|
||||
# OPTIMIZATION: Filter files by filename date BEFORE parallel loading
|
||||
# Skip files outside harvest season (can save 60-80% of I/O on large datasets)
|
||||
if (!is.null(date_filter_min) && !is.null(date_filter_max)) {
|
||||
all_daily_files <- all_daily_files[
|
||||
{
|
||||
dates <- as.Date(tools::file_path_sans_ext(basename(all_daily_files)), format = "%Y-%m-%d")
|
||||
!is.na(dates) & dates >= date_filter_min & dates <= date_filter_max
|
||||
}
|
||||
]
|
||||
safe_log(sprintf("Filtered to %d files within harvest season date range", length(all_daily_files)))
|
||||
}
|
||||
|
||||
# Set up parallel future plan (Windows PSOCK multisession; Mac/Linux can use forking)
|
||||
# Automatically detect available cores and limit to reasonable number
|
||||
n_cores <- min(parallel::detectCores() - 1, 8) # Use max 8 cores (diminishing returns after)
|
||||
future::plan(strategy = future::multisession, workers = n_cores)
|
||||
safe_log(sprintf("Using %d parallel workers for file I/O", n_cores))
|
||||
|
||||
# Parallel file reading: future_map_dfr processes each file in parallel
|
||||
# Returns combined dataframe directly (no need to rbind)
|
||||
combined_long <- furrr::future_map_dfr(
|
||||
all_daily_files,
|
||||
.progress = TRUE,
|
||||
.options = furrr::furrr_options(seed = TRUE),
|
||||
function(file) {
|
||||
# Extract date from filename: {YYYY-MM-DD}.rds
|
||||
filename <- basename(file)
|
||||
date_str <- tools::file_path_sans_ext(filename)
|
||||
|
||||
# Parse date
|
||||
if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) {
|
||||
parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
|
||||
} else {
|
||||
if (level %in% c("ERROR", "WARNING")) {
|
||||
warning(message)
|
||||
} else {
|
||||
message(message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#' Load and prepare the combined CI data
|
||||
#'
|
||||
#' @param data_dir Directory containing the combined CI data
|
||||
#' @return Long-format dataframe with CI values by date
|
||||
#'
|
||||
load_combined_ci_data <- function(data_dir) {
|
||||
file_path <- here::here(data_dir, "combined_CI_data.rds")
|
||||
|
||||
if (!file.exists(file_path)) {
|
||||
stop(paste("Combined CI data file not found:", file_path))
|
||||
return(data.frame()) # Return empty dataframe if parse fails
|
||||
}
|
||||
|
||||
safe_log(paste("Loading CI data from:", file_path))
|
||||
if (is.na(parsed_date)) {
|
||||
return(data.frame())
|
||||
}
|
||||
|
||||
# Load and transform the data to long format
|
||||
pivot_stats <- readRDS(file_path) %>%
|
||||
dplyr::ungroup() %>%
|
||||
dplyr::group_by(field, sub_field) %>%
|
||||
dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
|
||||
# Read RDS file
|
||||
tryCatch({
|
||||
rds_data <- readRDS(file)
|
||||
|
||||
pivot_stats_long <- pivot_stats %>%
|
||||
tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
|
||||
dplyr::mutate(
|
||||
Date = lubridate::ymd(Date),
|
||||
value = as.numeric(value)
|
||||
) %>%
|
||||
tidyr::drop_na(c("value", "Date")) %>%
|
||||
dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names
|
||||
dplyr::filter(!is.infinite(value)) %>%
|
||||
dplyr::distinct()
|
||||
if (is.null(rds_data) || nrow(rds_data) == 0) {
|
||||
return(data.frame())
|
||||
}
|
||||
|
||||
safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
|
||||
# Add date column to the data
|
||||
rds_data %>%
|
||||
dplyr::mutate(Date = parsed_date)
|
||||
|
||||
}, error = function(e) {
|
||||
return(data.frame()) # Return empty dataframe on error
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
# Return to sequential processing to avoid nested parallelism
|
||||
future::plan(future::sequential)
|
||||
|
||||
if (nrow(combined_long) == 0) {
|
||||
safe_log("Warning: No valid CI data loaded from daily files", "WARNING")
|
||||
return(data.frame())
|
||||
}
|
||||
|
||||
# OPTIMIZATION: Use data.table for fast filtering (10-20x faster than dplyr on large datasets)
|
||||
# Reshape to long format using ci_mean as the main CI value
|
||||
DT <- data.table::as.data.table(combined_long)
|
||||
DT <- DT[, .(field, sub_field, ci_mean, Date)]
|
||||
DT[, c("value") := list(as.numeric(ci_mean))]
|
||||
DT[, ci_mean := NULL]
|
||||
|
||||
# Fast filtering without .distinct() (which is slow on large datasets)
|
||||
# Keep rows where Date is valid, field/sub_field exist, and value is finite
|
||||
DT <- DT[!is.na(Date) & !is.na(sub_field) & !is.na(field) & is.finite(value)]
|
||||
|
||||
# Convert back to tibble for compatibility with rest of pipeline
|
||||
pivot_stats_long <- dplyr::as_tibble(DT)
|
||||
|
||||
safe_log(sprintf("Loaded %d CI data points from %d daily files",
|
||||
nrow(pivot_stats_long), length(all_daily_files)))
|
||||
|
||||
return(pivot_stats_long)
|
||||
}
|
||||
|
|
@ -65,15 +151,16 @@ load_combined_ci_data <- function(data_dir) {
|
|||
#' @param harvesting_data Dataframe with harvesting information
|
||||
#' @param field_CI_data Dataframe with CI measurements
|
||||
#' @param season Year of the growing season
|
||||
#' @param verbose Logical: whether to log warnings/info (default TRUE). Set to FALSE during progress bar iteration.
|
||||
#' @return Dataframe with interpolated daily CI values
|
||||
#'
|
||||
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) {
|
||||
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, verbose = TRUE) {
|
||||
# Filter harvesting data for the given season and field name
|
||||
filtered_harvesting_data <- harvesting_data %>%
|
||||
dplyr::filter(year == season, sub_field == field_name)
|
||||
|
||||
if (nrow(filtered_harvesting_data) == 0) {
|
||||
safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
|
||||
if (verbose) safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
|
||||
return(data.frame())
|
||||
}
|
||||
|
||||
|
|
@ -83,7 +170,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
|||
|
||||
# Return an empty data frame if no CI data is found
|
||||
if (nrow(filtered_field_CI_data) == 0) {
|
||||
safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
|
||||
if (verbose) safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
|
||||
return(data.frame())
|
||||
}
|
||||
|
||||
|
|
@ -107,12 +194,14 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
|||
|
||||
# If CI is empty after filtering, return an empty dataframe
|
||||
if (nrow(CI) == 0) {
|
||||
if (verbose) {
|
||||
safe_log(paste0("No CI data within season dates for field: ", field_name,
|
||||
" (Season: ", season, ", dates: ",
|
||||
format(season_start, "%Y-%m-%d"), " to ",
|
||||
format(season_end, "%Y-%m-%d"),
|
||||
"). Available CI data range: ", ci_date_range),
|
||||
"WARNING")
|
||||
}
|
||||
return(data.frame())
|
||||
}
|
||||
|
||||
|
|
@ -125,25 +214,23 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
|||
subField = field_name
|
||||
)
|
||||
|
||||
# Log successful interpolation
|
||||
safe_log(paste0("Successfully interpolated CI data for field: ", field_name,
|
||||
" (Season: ", season, ", dates: ",
|
||||
format(season_start, "%Y-%m-%d"), " to ",
|
||||
format(season_end, "%Y-%m-%d"),
|
||||
"). ", nrow(CI), " data points created."))
|
||||
|
||||
# Return data with success status
|
||||
return(CI)
|
||||
}, error = function(e) {
|
||||
# Return empty dataframe on error (will be tracked separately)
|
||||
if (verbose) {
|
||||
safe_log(paste0("Error interpolating CI data for field ", field_name,
|
||||
" in season ", season,
|
||||
" (", format(season_start, "%Y-%m-%d"), " to ",
|
||||
format(season_end, "%Y-%m-%d"),
|
||||
"): ", e$message), "ERROR")
|
||||
}
|
||||
return(data.frame())
|
||||
})
|
||||
}
|
||||
|
||||
#' Generate interpolated CI data for all fields and seasons
|
||||
#' PARALLELIZE: Processes fields in parallel using furrr::future_map_df()
|
||||
#'
|
||||
#' @param years Vector of years to process
|
||||
#' @param harvesting_data Dataframe with harvesting information
|
||||
|
|
@ -153,17 +240,19 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
|||
generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
||||
safe_log("Starting CI data interpolation for all fields")
|
||||
|
||||
# Track failed fields for end-of-run summary
|
||||
failed_fields <- list()
|
||||
total_fields <- 0
|
||||
successful_fields <- 0
|
||||
|
||||
# Process each year
|
||||
result <- purrr::map_df(years, function(yr) {
|
||||
safe_log(paste("Processing year:", yr))
|
||||
|
||||
# Get the fields harvested in this year with valid season start dates
|
||||
sub_fields <- harvesting_data %>%
|
||||
dplyr::filter(year == yr, !is.na(season_start)) %>%
|
||||
dplyr::pull(sub_field)
|
||||
|
||||
if (length(sub_fields) == 0) {
|
||||
safe_log(paste("No fields with valid season data for year:", yr), "WARNING")
|
||||
return(data.frame())
|
||||
}
|
||||
|
||||
|
|
@ -172,24 +261,74 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
|||
purrr::keep(~ any(ci_data$sub_field == .x))
|
||||
|
||||
if (length(valid_sub_fields) == 0) {
|
||||
safe_log(paste("No fields with CI data for year:", yr), "WARNING")
|
||||
return(data.frame())
|
||||
}
|
||||
|
||||
# Extract and interpolate data for each valid field
|
||||
safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr))
|
||||
total_fields <<- total_fields + length(valid_sub_fields)
|
||||
safe_log(sprintf("Year %d: Processing %d fields in parallel", yr, length(valid_sub_fields)))
|
||||
|
||||
result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x,
|
||||
# Set up parallel future plan for field interpolation
|
||||
# Allocate 1 core per ~100 fields (with minimum 2 cores)
|
||||
n_cores <- max(2, min(parallel::detectCores() - 1, ceiling(length(valid_sub_fields) / 100)))
|
||||
future::plan(strategy = future::multisession, workers = n_cores)
|
||||
|
||||
# PARALLELIZE: Process all fields in parallel (each extracts & interpolates independently)
|
||||
result_list <- furrr::future_map(
|
||||
valid_sub_fields,
|
||||
.progress = TRUE,
|
||||
.options = furrr::furrr_options(seed = TRUE),
|
||||
function(field) {
|
||||
# Call with verbose=FALSE to suppress warnings during parallel iteration
|
||||
extract_CI_data(field,
|
||||
harvesting_data = harvesting_data,
|
||||
field_CI_data = ci_data,
|
||||
season = yr)) %>%
|
||||
purrr::list_rbind()
|
||||
season = yr,
|
||||
verbose = FALSE)
|
||||
}
|
||||
)
|
||||
|
||||
safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr))
|
||||
return(result)
|
||||
# Return to sequential processing
|
||||
future::plan(future::sequential)
|
||||
|
||||
# Process results and tracking
|
||||
for (i in seq_along(result_list)) {
|
||||
field_result <- result_list[[i]]
|
||||
field_name <- valid_sub_fields[i]
|
||||
|
||||
if (nrow(field_result) > 0) {
|
||||
successful_fields <<- successful_fields + 1
|
||||
} else {
|
||||
failed_fields[[length(failed_fields) + 1]] <<- list(
|
||||
field = field_name,
|
||||
season = yr,
|
||||
reason = "Unable to generate interpolated data"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
# Combine all results for this year
|
||||
result_list <- result_list[sapply(result_list, nrow) > 0] # Keep only non-empty
|
||||
if (length(result_list) > 0) {
|
||||
purrr::list_rbind(result_list)
|
||||
} else {
|
||||
data.frame()
|
||||
}
|
||||
})
|
||||
|
||||
safe_log(paste("Total interpolated data points:", nrow(result)))
|
||||
# Print summary
|
||||
safe_log(sprintf("\n=== Interpolation Summary ==="))
|
||||
safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields))
|
||||
|
||||
if (length(failed_fields) > 0) {
|
||||
safe_log(sprintf("Failed to interpolate: %d fields", length(failed_fields)))
|
||||
for (failure in failed_fields) {
|
||||
safe_log(sprintf(" - Field %s (Season %d): %s",
|
||||
failure$field, failure$season, failure$reason), "WARNING")
|
||||
}
|
||||
}
|
||||
|
||||
safe_log(sprintf("Total interpolated data points: %d", nrow(result)))
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
|
|
@ -222,11 +361,19 @@ calculate_growth_metrics <- function(interpolated_data) {
|
|||
#' @return Path to the saved file
|
||||
#'
|
||||
save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") {
|
||||
# Validate input
|
||||
if (is.null(output_dir) || !is.character(output_dir) || length(output_dir) == 0) {
|
||||
stop("output_dir must be a non-empty character string")
|
||||
}
|
||||
|
||||
# Normalize path separators for Windows compatibility
|
||||
output_dir <- normalizePath(output_dir, winslash = "/", mustWork = FALSE)
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# Create full file path
|
||||
file_path <- here::here(output_dir, file_name)
|
||||
# Create full file path using file.path (more robust than here::here for absolute paths)
|
||||
file_path <- file.path(output_dir, file_name)
|
||||
|
||||
# Save the data
|
||||
saveRDS(data, file_path)
|
||||
|
|
|
|||
|
|
@ -1,71 +1,142 @@
|
|||
# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\interpolate_growth_model.R
|
||||
# ============================================================================
|
||||
# SCRIPT 30: Growth Model Interpolation (CI Time Series)
|
||||
# ============================================================================
|
||||
# PURPOSE:
|
||||
# Interpolate Canopy Index (CI) values across time to create continuous
|
||||
# growth curves. Fills gaps in measurement dates, applies smoothing via
|
||||
# LOESS, and generates daily CI estimates and cumulative statistics for
|
||||
# each field. Enables downstream yield prediction and trend analysis.
|
||||
#
|
||||
# INTERPOLATE_GROWTH_MODEL.R
|
||||
# =========================
|
||||
# This script interpolates CI (Chlorophyll Index) values between measurement dates
|
||||
# to create a continuous growth model. It generates daily values and cumulative
|
||||
# CI statistics for each field.
|
||||
# INPUT DATA:
|
||||
# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
|
||||
# - Format: RDS (wide format: fields × dates with CI values)
|
||||
# - Requirement: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
|
||||
#
|
||||
# Usage: Rscript interpolate_growth_model.R [project_dir]
|
||||
# - project_dir: Project directory name (e.g., "chemba")
|
||||
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata
|
||||
# OUTPUT DATA:
|
||||
# - Destination: laravel_app/storage/app/{project}/interpolated_ci/
|
||||
# - Format: RDS files per field (daily CI estimates)
|
||||
# - Also exports: Growth model curves as RDS (cumulative CI, daily values)
|
||||
#
|
||||
# USAGE:
|
||||
# Rscript 30_interpolate_growth_model.R [project]
|
||||
#
|
||||
# Example (Windows PowerShell):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||
#
|
||||
# PARAMETERS:
|
||||
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||||
#
|
||||
# CLIENT TYPES:
|
||||
# - cane_supply (ANGATA): Yes - core growth monitoring
|
||||
# - agronomic_support (AURA): Yes - field health trend analysis
|
||||
#
|
||||
# DEPENDENCIES:
|
||||
# - Packages: tidyverse, lubridate
|
||||
# - Utils files: parameters_project.R, 00_common_utils.R, 30_growth_model_utils.R
|
||||
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
|
||||
# - Input data: combined_CI_data.rds from Script 20
|
||||
# - Data directories: interpolated_ci/ (created if missing)
|
||||
#
|
||||
# NOTES:
|
||||
# - Interpolation method: LOESS smoothing with span = 0.3 (sensitive to local trends)
|
||||
# - Gap-filling: Assumes continuous growth between measurements; skips clouds
|
||||
# - Cumulative CI: Sum of daily interpolated values from planting to current date
|
||||
# - Used by: Script 80 (KPI trends) and Script 12 (yield forecasting)
|
||||
# - Critical for 8-week CV trend calculation and 4-week growth categorization
|
||||
#
|
||||
# RELATED ISSUES:
|
||||
# SC-112: Utilities restructuring
|
||||
# SC-108: Core pipeline improvements
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
# 1. Load required packages
|
||||
# -----------------------
|
||||
suppressPackageStartupMessages({
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
library(here)
|
||||
# File path handling
|
||||
library(here) # For relative path resolution (platform-independent file paths)
|
||||
|
||||
# Data manipulation
|
||||
library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
|
||||
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
|
||||
library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases)
|
||||
|
||||
# Parallel processing (Windows PSOCK + Mac/Linux fork-safe)
|
||||
library(future) # For setting up parallel execution plans
|
||||
library(furrr) # For future_map_dfr (parallel file I/O and field processing)
|
||||
library(parallel) # For detectCores (automatic CPU detection)
|
||||
library(data.table) # For fast filtering on large datasets
|
||||
})
|
||||
|
||||
# 2. Main function to handle interpolation
|
||||
# -------------------------------------
|
||||
main <- function() {
|
||||
# Process command line arguments
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
# =============================================================================
|
||||
# MAIN PROCESSING FUNCTION
|
||||
# =============================================================================
|
||||
|
||||
# Get project directory from arguments or use default
|
||||
if (length(args) >= 1 && !is.na(args[1])) {
|
||||
project_dir <- as.character(args[1])
|
||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
||||
} else {
|
||||
project_dir <- "esa"
|
||||
message("No project_dir provided. Using default:", project_dir)
|
||||
main <- function() {
|
||||
# STEP 1: Set working directory to project root (smartcane/)
|
||||
# This ensures all relative paths resolve correctly
|
||||
if (basename(getwd()) == "r_app") {
|
||||
setwd("..")
|
||||
}
|
||||
|
||||
# Make project_dir available globally so parameters_project.R can use it
|
||||
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
|
||||
# Parse command-line arguments FIRST
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||
|
||||
# Make project_dir available globally for parameters_project.R
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
|
||||
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
|
||||
ci_extraction_script <- TRUE
|
||||
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
|
||||
|
||||
# Initialize project configuration and load utility functions
|
||||
# Load parameters_project.R (provides setup_project_directories, etc.)
|
||||
tryCatch({
|
||||
source("parameters_project.R")
|
||||
source("30_growth_model_utils.R")
|
||||
source("r_app/parameters_project.R")
|
||||
}, error = function(e) {
|
||||
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
||||
tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
source(here::here("r_app", "30_growth_model_utils.R"))
|
||||
warning(paste("Successfully sourced files from 'r_app' directory."))
|
||||
|
||||
}, error = function(e) {
|
||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
||||
})
|
||||
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
|
||||
stop(e)
|
||||
})
|
||||
|
||||
log_message("Starting CI growth model interpolation")
|
||||
# Load growth model utilities
|
||||
tryCatch({
|
||||
source("r_app/30_growth_model_utils.R")
|
||||
}, error = function(e) {
|
||||
cat(sprintf("Error loading 30_growth_model_utils.R: %s\n", e$message))
|
||||
stop(e)
|
||||
})
|
||||
|
||||
# STEP 3: Now all utilities are loaded, proceed with script logic
|
||||
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
|
||||
safe_log(sprintf("Project: %s", project_dir))
|
||||
|
||||
# Set up directory paths from parameters
|
||||
setup <- setup_project_directories(project_dir)
|
||||
|
||||
# For per-field architecture: read from daily_ci_vals_dir (Script 20 per-field output)
|
||||
daily_vals_dir <- setup$daily_ci_vals_dir
|
||||
safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir))
|
||||
|
||||
safe_log("Starting CI growth model interpolation")
|
||||
|
||||
# Set up data directory paths
|
||||
data_dir <- setup$data_dir
|
||||
|
||||
# Load and process the data
|
||||
tryCatch({
|
||||
# Load the combined CI data
|
||||
CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
|
||||
# Load the combined CI data (created by Script 20 per-field)
|
||||
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
||||
# OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files)
|
||||
harvesting_data <- tryCatch({
|
||||
load_harvesting_data(data_dir)
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING")
|
||||
NULL
|
||||
})
|
||||
|
||||
# Load CI data with date range pre-filtering
|
||||
CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data)
|
||||
|
||||
# Validate harvesting data
|
||||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||||
safe_log("No harvesting data available", "ERROR")
|
||||
stop("No harvesting data available")
|
||||
}
|
||||
|
||||
|
|
@ -75,7 +146,7 @@ main <- function() {
|
|||
distinct(year) %>%
|
||||
pull(year)
|
||||
|
||||
log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
|
||||
safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
|
||||
|
||||
# Generate interpolated CI data for each year and field
|
||||
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
|
||||
|
|
@ -89,20 +160,20 @@ main <- function() {
|
|||
# Add daily and cumulative metrics
|
||||
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
|
||||
|
||||
# Save the processed data
|
||||
# Save the processed data to cumulative_vals directory
|
||||
save_growth_model(
|
||||
CI_all_with_metrics,
|
||||
cumulative_CI_vals_dir,
|
||||
setup$cumulative_ci_vals_dir,
|
||||
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||
)
|
||||
} else {
|
||||
log_message("No CI data was generated after interpolation", level = "WARNING")
|
||||
safe_log("No CI data was generated after interpolation", "WARNING")
|
||||
}
|
||||
|
||||
log_message("Growth model interpolation completed successfully")
|
||||
safe_log("Growth model interpolation completed successfully")
|
||||
|
||||
}, error = function(e) {
|
||||
log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
|
||||
safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
|
||||
stop(e$message)
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,215 +0,0 @@
|
|||
# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\mosaic_creation.R
|
||||
#
|
||||
# MOSAIC_CREATION.R
|
||||
# ===============
|
||||
# This script creates weekly mosaics from daily satellite imagery.
|
||||
# It handles command-line arguments and initiates the mosaic creation process.
|
||||
#
|
||||
# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [use_tiles] [tile_size]
|
||||
# - end_date: End date for processing (YYYY-MM-DD format)
|
||||
# - offset: Number of days to look back from end_date
|
||||
# - project_dir: Project directory name (e.g., "chemba")
|
||||
# - file_name: Optional custom output file name
|
||||
# - use_tiles: Use tile-based processing for memory efficiency (TRUE/FALSE, default: FALSE)
|
||||
# - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE)
|
||||
#
|
||||
# Examples:
|
||||
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 angata
|
||||
#
|
||||
|
||||
# 1. Load required packages
|
||||
# -----------------------
|
||||
suppressPackageStartupMessages({
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
library(here)
|
||||
})
|
||||
|
||||
# 2. Process command line arguments and run mosaic creation
|
||||
# ------------------------------------------------------
|
||||
main <- function() {
|
||||
# Capture command line arguments
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
# Process project_dir argument with default
|
||||
if (length(args) >= 3 && !is.na(args[3])) {
|
||||
project_dir <- as.character(args[3])
|
||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
||||
} else {
|
||||
# Default project directory
|
||||
project_dir <- "angata"
|
||||
message("No project_dir provided. Using default:", project_dir)
|
||||
}
|
||||
|
||||
# Make project_dir available globally so parameters_project.R can use it
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
|
||||
# Process end_date argument with default
|
||||
if (length(args) >= 1 && !is.na(args[1])) {
|
||||
end_date <- as.Date(args[1])
|
||||
if (is.na(end_date)) {
|
||||
message("Invalid end_date provided. Using current date.")
|
||||
end_date <- Sys.Date()
|
||||
}
|
||||
} else if (exists("end_date_str", envir = .GlobalEnv)) {
|
||||
end_date <- as.Date(get("end_date_str", envir = .GlobalEnv))
|
||||
} else {
|
||||
# Default to current date if no argument is provided
|
||||
end_date <- Sys.Date()
|
||||
message("No end_date provided. Using current date: ", format(end_date))
|
||||
}
|
||||
|
||||
# Process offset argument with default
|
||||
if (length(args) >= 2 && !is.na(args[2])) {
|
||||
offset <- as.numeric(args[2])
|
||||
if (is.na(offset) || offset <= 0) {
|
||||
message("Invalid offset provided. Using default (7 days).")
|
||||
offset <- 7
|
||||
}
|
||||
} else {
|
||||
# Default to 7 days if no argument is provided
|
||||
offset <- 7
|
||||
message("No offset provided. Using default:", offset, "days")
|
||||
}
|
||||
|
||||
# 3. Initialize project configuration
|
||||
# --------------------------------
|
||||
|
||||
# Detect which data source directory exists (merged_tif or merged_tif_8b)
|
||||
laravel_storage <- here::here("laravel_app/storage/app", project_dir)
|
||||
data_source <- if (dir.exists(file.path(laravel_storage, "merged_tif_8b"))) {
|
||||
message("Detected data source: merged_tif_8b (8-band optimized)")
|
||||
"merged_tif_8b"
|
||||
} else if (dir.exists(file.path(laravel_storage, "merged_tif"))) {
|
||||
message("Detected data source: merged_tif (legacy 4-band)")
|
||||
"merged_tif"
|
||||
} else {
|
||||
message("Warning: No data source found. Using default: merged_tif_8b")
|
||||
"merged_tif_8b"
|
||||
}
|
||||
|
||||
# Set global data_source for parameters_project.R
|
||||
assign("data_source", data_source, envir = .GlobalEnv)
|
||||
|
||||
tryCatch({
|
||||
source("parameters_project.R")
|
||||
source("40_mosaic_creation_utils.R")
|
||||
safe_log(paste("Successfully sourced files from default directory."))
|
||||
}, error = function(e) {
|
||||
message("Note: Could not open files from default directory (expected on some systems)")
|
||||
message("Attempting to source from 'r_app' directory instead...")
|
||||
tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
source(here::here("r_app", "40_mosaic_creation_utils.R"))
|
||||
message("✓ Successfully sourced files from 'r_app' directory")
|
||||
}, error = function(e) {
|
||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
||||
})
|
||||
})
|
||||
|
||||
# 4. Generate date range for processing
|
||||
# ---------------------------------
|
||||
dates <- date_list(end_date, offset)
|
||||
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
|
||||
|
||||
# Create output filename
|
||||
file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) {
|
||||
as.character(args[4])
|
||||
} else {
|
||||
paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")
|
||||
}
|
||||
|
||||
safe_log(paste("Output will be saved as:", file_name_tif))
|
||||
|
||||
# 5. Create weekly mosaics - route based on project tile detection
|
||||
# ---------------------------------------------------------------
|
||||
# The use_tile_mosaic flag is auto-detected by parameters_project.R
|
||||
# based on whether tiles exist in merged_final_tif/
|
||||
|
||||
if (!exists("use_tile_mosaic")) {
|
||||
# Fallback detection if flag not set (shouldn't happen)
|
||||
merged_final_dir <- file.path(laravel_storage, "merged_final_tif")
|
||||
tile_detection <- detect_mosaic_mode(merged_final_dir)
|
||||
use_tile_mosaic <- tile_detection$has_tiles
|
||||
}
|
||||
|
||||
if (use_tile_mosaic) {
|
||||
# TILE-BASED APPROACH: Create per-tile weekly MAX mosaics
|
||||
# This is used for projects like Angata with large ROIs requiring spatial partitioning
|
||||
# Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20)
|
||||
tryCatch({
|
||||
safe_log("Starting per-tile mosaic creation (tile-based approach)...")
|
||||
|
||||
# Detect grid size from merged_final_tif folder structure
|
||||
# Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc.
|
||||
merged_final_base <- file.path(laravel_storage, "merged_final_tif")
|
||||
grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE)
|
||||
# Look for grid size patterns like "5x5", "10x10", "20x20"
|
||||
grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
|
||||
|
||||
if (length(grid_patterns) == 0) {
|
||||
stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)")
|
||||
}
|
||||
|
||||
grid_size <- grid_patterns[1] # Use first grid size found
|
||||
safe_log(paste("Detected grid size:", grid_size))
|
||||
|
||||
# Point to the grid-specific merged_final_tif directory
|
||||
merged_final_with_grid <- file.path(merged_final_base, grid_size)
|
||||
|
||||
# Set output directory for per-tile mosaics, organized by grid size
|
||||
# Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif
|
||||
tile_output_base <- file.path(laravel_storage, "weekly_tile_max", grid_size)
|
||||
dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
created_tile_files <- create_weekly_mosaic_from_tiles(
|
||||
dates = dates,
|
||||
merged_final_dir = merged_final_with_grid,
|
||||
tile_output_dir = tile_output_base,
|
||||
field_boundaries = field_boundaries
|
||||
)
|
||||
|
||||
safe_log(paste("✓ Per-tile mosaic creation completed - created",
|
||||
length(created_tile_files), "tile files"))
|
||||
}, error = function(e) {
|
||||
safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR")
|
||||
traceback()
|
||||
stop("Mosaic creation failed")
|
||||
})
|
||||
|
||||
} else {
|
||||
# SINGLE-FILE APPROACH: Create single weekly mosaic file
|
||||
# This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output
|
||||
tryCatch({
|
||||
safe_log("Starting single-file mosaic creation (backward-compatible approach)...")
|
||||
|
||||
# Set output directory for single-file mosaics
|
||||
single_file_output_dir <- file.path(laravel_storage, "weekly_mosaic")
|
||||
|
||||
created_file <- create_weekly_mosaic(
|
||||
dates = dates,
|
||||
field_boundaries = field_boundaries,
|
||||
daily_vrt_dir = daily_vrt,
|
||||
merged_final_dir = merged_final,
|
||||
output_dir = single_file_output_dir,
|
||||
file_name_tif = file_name_tif,
|
||||
create_plots = FALSE
|
||||
)
|
||||
|
||||
safe_log(paste("✓ Single-file mosaic creation completed:", created_file))
|
||||
}, error = function(e) {
|
||||
safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR")
|
||||
traceback()
|
||||
stop("Mosaic creation failed")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (sys.nframe() == 0) {
|
||||
main()
|
||||
}
|
||||
|
||||
209
r_app/40_mosaic_creation_per_field.R
Normal file
209
r_app/40_mosaic_creation_per_field.R
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
# ============================================================================
|
||||
# SCRIPT 40: Weekly Mosaic Creation (Per-Field CI Aggregation)
|
||||
# ============================================================================
|
||||
# PURPOSE:
|
||||
# Aggregate daily per-field CI TIFFs into weekly mosaics. Handles multi-date
|
||||
# merging, cloud masking, and produces 5-band weekly output for reporting
|
||||
# and KPI calculations. Supports both per-field and grid-based tile architecture.
|
||||
#
|
||||
# INPUT DATA:
|
||||
# - Source: laravel_app/storage/app/{project}/field_tiles_CI/{FIELD}/{DATE}.tif
|
||||
# - Format: GeoTIFF (5-band: R,G,B,NIR,CI as float32)
|
||||
# - Dates: All available dates within week range
|
||||
#
|
||||
# OUTPUT DATA:
|
||||
# - Destination: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/
|
||||
# - Format: GeoTIFF (5-band: R,G,B,NIR,CI)
|
||||
# - Naming: week_WW_YYYY.tif (WW = ISO week, YYYY = ISO year)
|
||||
#
|
||||
# USAGE:
|
||||
# Rscript 40_mosaic_creation_per_field.R [end_date] [offset] [project]
|
||||
#
|
||||
# Example (Windows PowerShell):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-01-12 7 angata
|
||||
#
|
||||
# PARAMETERS:
|
||||
# - end_date: End date for processing (character, YYYY-MM-DD format, default today)
|
||||
# - offset: Days to look back (numeric, default 7 for one week)
|
||||
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||||
#
|
||||
# CLIENT TYPES:
|
||||
# - cane_supply (ANGATA): Yes - weekly monitoring
|
||||
# - agronomic_support (AURA): Yes - field health reporting
|
||||
#
|
||||
# DEPENDENCIES:
|
||||
# - Packages: terra, sf, tidyverse, lubridate
|
||||
# - Utils files: parameters_project.R, 40_mosaic_creation_per_field_utils.R
|
||||
# - Input data: Daily per-field CI TIFFs from Script 20
|
||||
# - Data directories: field_tiles_CI/, weekly_mosaic/
|
||||
#
|
||||
# NOTES:
|
||||
# - Aggregation method: Maximum CI value per pixel across week (handles clouds)
|
||||
# - ISO week-year used for consistent date handling across year boundaries
|
||||
# - Supports both single-file and tiled mosaic architectures
|
||||
# - Output feeds Scripts 80 (KPI calculations) and 90/91 (reporting)
|
||||
# - Critical for trend analysis: week-over-week CI comparison
|
||||
#
|
||||
# RELATED ISSUES:
|
||||
# SC-112: Script 40 cleanup (deleted legacy mosaic utils files)
|
||||
# SC-108: Core pipeline improvements
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
# 1. Load required packages
|
||||
# -----------------------
|
||||
suppressPackageStartupMessages({
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
library(here)
|
||||
})
|
||||
|
||||
# 2. Main execution function
|
||||
# -------------------------
|
||||
main <- function() {
|
||||
|
||||
cat("\n")
|
||||
cat("========================================================\n")
|
||||
cat(" Script 40: Per-Field Weekly Mosaic Creation\n")
|
||||
cat("========================================================\n\n")
|
||||
|
||||
# Capture command line arguments
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
# ==== Process Arguments ====
|
||||
|
||||
# Project directory
|
||||
if (length(args) >= 3 && !is.na(args[3])) {
|
||||
project_dir <- as.character(args[3])
|
||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
||||
} else {
|
||||
project_dir <- "angata"
|
||||
message("[INFO] No project_dir provided. Using default: angata")
|
||||
}
|
||||
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
message(paste("[INFO] Project:", project_dir))
|
||||
|
||||
# End date
|
||||
if (length(args) >= 1 && !is.na(args[1])) {
|
||||
end_date <- as.Date(args[1], format = "%Y-%m-%d")
|
||||
if (is.na(end_date)) {
|
||||
message("[WARNING] Invalid end_date. Using current date.")
|
||||
end_date <- Sys.Date()
|
||||
}
|
||||
} else {
|
||||
end_date <- Sys.Date()
|
||||
message(paste("[INFO] No end_date provided. Using current date:", format(end_date)))
|
||||
}
|
||||
|
||||
# Offset (days back)
|
||||
if (length(args) >= 2 && !is.na(args[2])) {
|
||||
offset <- as.numeric(args[2])
|
||||
if (is.na(offset) || offset <= 0) {
|
||||
message("[WARNING] Invalid offset. Using default: 7 days")
|
||||
offset <- 7
|
||||
}
|
||||
} else {
|
||||
offset <- 7
|
||||
message("[INFO] No offset provided. Using default: 7 days")
|
||||
}
|
||||
|
||||
# ==== Load Configuration ====
|
||||
|
||||
# Set working directory if needed
|
||||
if (basename(getwd()) == "r_app") {
|
||||
setwd("..")
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
source("r_app/parameters_project.R")
|
||||
message("[INFO] ✓ Loaded parameters_project.R")
|
||||
}, error = function(e) {
|
||||
stop("[ERROR] Failed to load parameters_project.R: ", e$message)
|
||||
})
|
||||
|
||||
tryCatch({
|
||||
source("r_app/40_mosaic_creation_per_field_utils.R")
|
||||
message("[INFO] ✓ Loaded 40_mosaic_creation_per_field_utils.R")
|
||||
}, error = function(e) {
|
||||
stop("[ERROR] Failed to load utilities: ", e$message)
|
||||
})
|
||||
|
||||
# ==== Get Project Directories ====
|
||||
|
||||
setup <- setup_project_directories(project_dir)
|
||||
|
||||
# Determine input/output directories
|
||||
# Input: field_tiles_CI/ (from Script 20)
|
||||
field_tiles_ci_dir <- setup$field_tiles_ci_dir
|
||||
|
||||
# Output: weekly_mosaic/ (for Scripts 90/91)
|
||||
weekly_mosaic_output_dir <- file.path(setup$laravel_storage_dir, "weekly_mosaic")
|
||||
|
||||
message(paste("[INFO] Input directory:", field_tiles_ci_dir))
|
||||
message(paste("[INFO] Output directory:", weekly_mosaic_output_dir))
|
||||
|
||||
# ==== Validate Input Directory ====
|
||||
|
||||
if (!dir.exists(field_tiles_ci_dir)) {
|
||||
stop(paste("[ERROR] Input directory not found:", field_tiles_ci_dir,
|
||||
"\nScript 20 (CI extraction) must be run first to create per-field TIFFs."))
|
||||
}
|
||||
|
||||
# Check if directory has any TIFFs
|
||||
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE)
|
||||
if (length(field_dirs) == 0) {
|
||||
stop(paste("[ERROR] No field subdirectories found in:", field_tiles_ci_dir))
|
||||
}
|
||||
|
||||
message(paste("[INFO] Found", length(field_dirs), "field directories"))
|
||||
|
||||
# ==== Generate Date Range ====
|
||||
|
||||
dates <- date_list(end_date, offset)
|
||||
|
||||
# Validate week calculation
|
||||
message(sprintf("[INFO] Requested offset: %d days", offset))
|
||||
message(sprintf("[INFO] End date: %s", format(end_date, "%Y-%m-%d")))
|
||||
message(sprintf("[INFO] Start date: %s", format(dates$start_date, "%Y-%m-%d")))
|
||||
message(sprintf("[INFO] Calculating ISO week: %d", dates$week))
|
||||
message(sprintf("[INFO] Calculating ISO year: %d", dates$year))
|
||||
|
||||
# ==== Create Per-Field Weekly Mosaics ====
|
||||
|
||||
created_files <- create_all_field_weekly_mosaics(
|
||||
dates = dates,
|
||||
field_tiles_ci_dir = field_tiles_ci_dir,
|
||||
output_dir = weekly_mosaic_output_dir
|
||||
)
|
||||
|
||||
# ==== Summary ====
|
||||
|
||||
message("\n")
|
||||
message("========================================================")
|
||||
message(paste(" COMPLETED"))
|
||||
message(paste(" Created:", length(created_files), "weekly field mosaics"))
|
||||
message("========================================================\n")
|
||||
|
||||
if (length(created_files) > 0) {
|
||||
message("[SUCCESS] Weekly mosaics ready for reporting (Scripts 90/91)")
|
||||
} else {
|
||||
message("[WARNING] No mosaics created - check input data")
|
||||
}
|
||||
|
||||
return(invisible(created_files))
|
||||
}
|
||||
|
||||
# Execute main if script is run directly
|
||||
if (sys.nframe() == 0) {
|
||||
tryCatch({
|
||||
created <- main()
|
||||
quit(save = "no", status = 0)
|
||||
}, error = function(e) {
|
||||
message(paste("\n[FATAL ERROR]", e$message))
|
||||
quit(save = "no", status = 1)
|
||||
})
|
||||
}
|
||||
268
r_app/40_mosaic_creation_per_field_utils.R
Normal file
268
r_app/40_mosaic_creation_per_field_utils.R
Normal file
|
|
@ -0,0 +1,268 @@
|
|||
# MOSAIC_CREATION_PER_FIELD_UTILS.R
|
||||
# ==================================
|
||||
# Utility functions for creating per-field weekly mosaics from per-field daily TIFFs.
|
||||
#
|
||||
# This module aggregates daily per-field 5-band TIFFs (R,G,B,NIR,CI from Script 20)
|
||||
# into weekly per-field mosaics using MAX compositing.
|
||||
#
|
||||
# DATA FLOW:
|
||||
# Script 20: field_tiles_CI/{FIELD}/{DATE}.tif (5-band daily, per-field)
|
||||
# ↓
|
||||
# Script 40 NEW (this module):
|
||||
# For each field:
|
||||
# For each week:
|
||||
# - Find all daily TIFFs for that week
|
||||
# - Stack & create MAX composite
|
||||
# - Save: weekly_mosaic/{FIELD}/week_WW_YYYY.tif
|
||||
# ↓
|
||||
# Scripts 90/91: Read weekly_mosaic/{FIELD}/week_WW_YYYY.tif (unchanged interface)
|
||||
|
||||
#' Generate date range for processing (ISO week-based)
|
||||
#'
|
||||
#' @param end_date The end date (Date object or YYYY-MM-DD string)
|
||||
#' @param offset Number of days to look back from end_date (typically 7 for one week)
|
||||
#' @return List with week, year, start_date, end_date, days_filter (vector of YYYY-MM-DD strings)
|
||||
#'
|
||||
date_list <- function(end_date, offset) {
|
||||
if (!lubridate::is.Date(end_date)) {
|
||||
end_date <- as.Date(end_date)
|
||||
if (is.na(end_date)) {
|
||||
stop("Invalid end_date. Expected Date object or YYYY-MM-DD string.")
|
||||
}
|
||||
}
|
||||
|
||||
offset <- as.numeric(offset)
|
||||
if (is.na(offset) || offset < 1) {
|
||||
stop("Invalid offset. Expected positive number.")
|
||||
}
|
||||
|
||||
offset <- offset - 1 # Adjust to include end_date
|
||||
start_date <- end_date - lubridate::days(offset)
|
||||
|
||||
week <- lubridate::isoweek(end_date)
|
||||
year <- lubridate::isoyear(end_date)
|
||||
|
||||
# Validate: Check that all dates in range belong to same ISO week
|
||||
start_week <- lubridate::isoweek(start_date)
|
||||
start_year <- lubridate::isoyear(start_date)
|
||||
|
||||
if (start_week != week || start_year != year) {
|
||||
safe_log(sprintf("WARNING: Date range spans multiple ISO weeks! Start: week %d/%d, End: week %d/%d. Using END date week %d/%d.",
|
||||
start_week, start_year, week, year, week, year), "WARNING")
|
||||
}
|
||||
|
||||
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
||||
days_filter <- format(days_filter, "%Y-%m-%d")
|
||||
|
||||
safe_log(paste("Date range:", start_date, "to", end_date,
|
||||
"(week", week, "of", year, ")"))
|
||||
|
||||
return(list(
|
||||
week = week,
|
||||
year = year,
|
||||
start_date = start_date,
|
||||
end_date = end_date,
|
||||
days_filter = days_filter
|
||||
))
|
||||
}
|
||||
|
||||
#' Find all per-field daily TIFFs for a specific week
|
||||
#'
|
||||
#' @param field_tiles_ci_dir Base directory containing per-field TIFFs
|
||||
#' (e.g., field_tiles_CI/)
|
||||
#' @param days_filter Vector of YYYY-MM-DD dates to match
|
||||
#' @return List with field names and their matching TIFF files for the week
|
||||
#'
|
||||
find_per_field_tiffs_for_week <- function(field_tiles_ci_dir, days_filter) {
|
||||
|
||||
if (!dir.exists(field_tiles_ci_dir)) {
|
||||
safe_log(paste("Field TIFFs directory not found:", field_tiles_ci_dir), "WARNING")
|
||||
return(list())
|
||||
}
|
||||
|
||||
# List all field subdirectories
|
||||
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE)
|
||||
|
||||
if (length(field_dirs) == 0) {
|
||||
safe_log("No field subdirectories found in field_tiles_CI/", "WARNING")
|
||||
return(list())
|
||||
}
|
||||
|
||||
# For each field, find TIFF files matching the week's dates
|
||||
field_tiffs <- list()
|
||||
|
||||
for (field in field_dirs) {
|
||||
field_path <- file.path(field_tiles_ci_dir, field)
|
||||
|
||||
# Find all TIFF files in this field directory
|
||||
tiff_files <- list.files(field_path, pattern = "\\.tif$", full.names = TRUE)
|
||||
|
||||
if (length(tiff_files) == 0) {
|
||||
next # Skip fields with no TIFFs
|
||||
}
|
||||
|
||||
# Filter to only those matching week's dates
|
||||
matching_files <- tiff_files[grepl(paste(days_filter, collapse = "|"), tiff_files)]
|
||||
|
||||
if (length(matching_files) > 0) {
|
||||
field_tiffs[[field]] <- sort(matching_files)
|
||||
}
|
||||
}
|
||||
|
||||
safe_log(paste("Found TIFFs for", length(field_tiffs), "fields in week"))
|
||||
|
||||
return(field_tiffs)
|
||||
}
|
||||
|
||||
#' Create weekly MAX composite for a single field
|
||||
#'
|
||||
#' Loads all daily TIFFs for a field+week combination and creates a MAX composite
|
||||
#' (per-band maximum across all days).
|
||||
#'
|
||||
#' @param tiff_files Vector of TIFF file paths for this field+week
|
||||
#' @param field_name Name of the field (for logging)
|
||||
#' @return SpatRaster with 5 bands (R,G,B,NIR,CI), or NULL if fails
|
||||
#'
|
||||
create_field_weekly_composite <- function(tiff_files, field_name) {
|
||||
|
||||
if (length(tiff_files) == 0) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
# Load all TIFFs
|
||||
rasters <- list()
|
||||
for (file in tiff_files) {
|
||||
tryCatch({
|
||||
r <- terra::rast(file)
|
||||
rasters[[length(rasters) + 1]] <- r
|
||||
}, error = function(e) {
|
||||
# Silently skip load errors (they're already counted)
|
||||
})
|
||||
}
|
||||
|
||||
if (length(rasters) == 0) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Create MAX composite
|
||||
if (length(rasters) == 1) {
|
||||
composite <- rasters[[1]]
|
||||
} else {
|
||||
# Stack all rasters and apply MAX per pixel per band
|
||||
collection <- terra::sprc(rasters)
|
||||
composite <- terra::mosaic(collection, fun = "max")
|
||||
}
|
||||
|
||||
# Ensure 5 bands with expected names
|
||||
if (terra::nlyr(composite) >= 5) {
|
||||
composite <- terra::subset(composite, 1:5)
|
||||
names(composite) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||
}
|
||||
|
||||
|
||||
return(composite)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error creating composite for field", field_name, ":", e$message), "ERROR")
|
||||
return(NULL)
|
||||
})
|
||||
}
|
||||
|
||||
#' Save per-field weekly mosaic
|
||||
#'
|
||||
#' @param raster SpatRaster to save
|
||||
#' @param output_dir Base output directory (e.g., laravel_app/storage/app/{project}/weekly_mosaic/)
|
||||
#' @param field_name Name of the field
|
||||
#' @param week Week number (ISO week)
|
||||
#' @param year Year (ISO year)
|
||||
#' @return File path of saved TIFF, or NULL if fails
|
||||
#'
|
||||
save_field_weekly_mosaic <- function(raster, output_dir, field_name, week, year) {
|
||||
|
||||
if (is.null(raster)) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
# Create field-specific output directory
|
||||
field_output_dir <- file.path(output_dir, field_name)
|
||||
dir.create(field_output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# Generate filename: week_WW_YYYY.tif
|
||||
filename <- sprintf("week_%02d_%04d.tif", week, year)
|
||||
file_path <- file.path(field_output_dir, filename)
|
||||
|
||||
# Save raster (silently)
|
||||
terra::writeRaster(raster, file_path, overwrite = TRUE)
|
||||
|
||||
return(file_path)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error saving mosaic for field", field_name, ":", e$message), "ERROR")
|
||||
return(NULL)
|
||||
})
|
||||
}
|
||||
|
||||
#' Create all weekly mosaics for all fields in a week
|
||||
#'
|
||||
#' Main orchestration function. Loops over all fields and creates weekly mosaics.
|
||||
#'
|
||||
#' @param dates List from date_list() - contains week, year, days_filter
|
||||
#' @param field_tiles_ci_dir Input: field_tiles_CI/ directory
|
||||
#' @param output_dir Output: weekly_mosaic/ directory
|
||||
#' @return Vector of successfully created file paths
|
||||
#'
|
||||
create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_dir) {
|
||||
|
||||
safe_log(paste("=== Creating Per-Field Weekly Mosaics ==="))
|
||||
safe_log(paste("Week:", dates$week, "Year:", dates$year))
|
||||
|
||||
# Find all per-field TIFFs for this week
|
||||
field_tiffs <- find_per_field_tiffs_for_week(field_tiles_ci_dir, dates$days_filter)
|
||||
|
||||
if (length(field_tiffs) == 0) {
|
||||
safe_log("No per-field TIFFs found for this week - returning empty list", "WARNING")
|
||||
return(character())
|
||||
}
|
||||
|
||||
safe_log(paste("Processing", length(field_tiffs), "fields..."))
|
||||
|
||||
created_files <- character()
|
||||
|
||||
# Initialize progress bar
|
||||
pb <- txtProgressBar(min = 0, max = length(field_tiffs), style = 3, width = 50)
|
||||
counter <- 0
|
||||
|
||||
# Process each field
|
||||
for (field_name in names(field_tiffs)) {
|
||||
counter <- counter + 1
|
||||
tiff_files <- field_tiffs[[field_name]]
|
||||
|
||||
# Create composite
|
||||
composite <- create_field_weekly_composite(tiff_files, field_name)
|
||||
|
||||
if (!is.null(composite)) {
|
||||
# Save
|
||||
saved_path <- save_field_weekly_mosaic(
|
||||
composite,
|
||||
output_dir,
|
||||
field_name,
|
||||
dates$week,
|
||||
dates$year
|
||||
)
|
||||
|
||||
if (!is.null(saved_path)) {
|
||||
created_files <- c(created_files, saved_path)
|
||||
}
|
||||
}
|
||||
|
||||
setTxtProgressBar(pb, counter)
|
||||
}
|
||||
|
||||
close(pb)
|
||||
cat("\n") # New line after progress bar
|
||||
safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics"))
|
||||
|
||||
return(created_files)
|
||||
}
|
||||
|
|
@ -1,286 +0,0 @@
|
|||
# MOSAIC_CREATION_TILE_UTILS.R
|
||||
# ============================
|
||||
# Tile-based processing utilities for scalable weekly mosaic creation
|
||||
#
|
||||
# STRATEGY: Memory-efficient, scalable approach for large study areas
|
||||
# Split daily full-extent mosaics into fixed-size tiles (e.g., 5×5 km),
|
||||
# then process each tile position across all days, and reassemble.
|
||||
#
|
||||
# WORKFLOW:
|
||||
# Daily full-extent TIFF
|
||||
# ↓
|
||||
# Split into N×M tiles (based on area size / tile_size)
|
||||
# E.g., 50×80 km area with 5 km tiles = 10×16 = 160 tiles
|
||||
# ↓
|
||||
# For EACH TILE POSITION (1 to 160):
|
||||
# - Load that tile from all 7 days
|
||||
# - Create MAX composite for that tile
|
||||
# - Save tile_NNN_MAX.tif
|
||||
# ↓
|
||||
# Reassemble all 160 MAX tiles back into full-extent
|
||||
# ↓
|
||||
# Save weekly full-extent mosaic
|
||||
#
|
||||
# KEY BENEFIT: Memory usage ~50 MB per tile (5×5 km), not 1.3 GB (full 50×80 km)
|
||||
# Scales automatically: bigger area = more tiles, same memory per tile
|
||||
#
|
||||
# TILE_SIZE: Configurable (default 5000 m = 5 km, adjustable via parameter)
|
||||
|
||||
#' Simple tile-based processing using terra::makeTiles()
|
||||
#'
|
||||
#' Calculates tile grid based on desired tile SIZE (e.g., 5 km), not grid count.
|
||||
#' This makes it SCALABLE: bigger area = more tiles automatically.
|
||||
#'
|
||||
#' @param raster_path Path to raster to split
|
||||
#' @param output_dir Directory to save tiles
|
||||
#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km)
|
||||
#' @return List with tiles (file paths) and metadata
|
||||
#'
|
||||
split_raster_into_tiles <- function(raster_path, output_dir, tile_size_m = 5000) {
|
||||
# Load raster
|
||||
r <- terra::rast(raster_path)
|
||||
|
||||
# Calculate how many tiles we need based on raster extent and tile size
|
||||
x_range <- terra::ext(r)$xmax - terra::ext(r)$xmin
|
||||
y_range <- terra::ext(r)$ymax - terra::ext(r)$ymin
|
||||
|
||||
n_tiles_x <- ceiling(x_range / tile_size_m)
|
||||
n_tiles_y <- ceiling(y_range / tile_size_m)
|
||||
|
||||
safe_log(paste("Splitting into", n_tiles_x, "×", n_tiles_y, "tiles of",
|
||||
tile_size_m / 1000, "km"))
|
||||
|
||||
# Create output directory
|
||||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# Use terra::makeTiles() - it splits based on n = c(rows, cols)
|
||||
tiles <- terra::makeTiles(
|
||||
r,
|
||||
n = c(n_tiles_y, n_tiles_x), # rows, cols
|
||||
filename = file.path(output_dir, "tile_.tif"),
|
||||
overwrite = TRUE
|
||||
)
|
||||
|
||||
safe_log(paste("Created", length(tiles), "tile files"))
|
||||
|
||||
return(list(
|
||||
tiles = tiles,
|
||||
n_tiles = length(tiles),
|
||||
n_tiles_x = n_tiles_x,
|
||||
n_tiles_y = n_tiles_y,
|
||||
extent = terra::ext(r),
|
||||
raster = raster_path
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
#' Create weekly MAX mosaic using TRUE tile-based processing
|
||||
#'
|
||||
#' TILE-BASED WORKFLOW (Memory efficient):
|
||||
#' 1. Calculate tile grid dynamically from extent and FIXED tile_size (e.g., 5 km)
|
||||
#' 2. For EACH TILE across all 7 days:
|
||||
#' - Load that tile from each daily file (small ~50 MB, not 1.3 GB)
|
||||
#' - Create MAX composite for just that tile
|
||||
#' 3. Reassemble all tiles into final full-extent mosaic
|
||||
#'
|
||||
#' SCALABILITY: Fixed 5 km tile size means bigger area = more tiles (automatic scaling)
|
||||
#'
|
||||
#' @param dates List from date_list() with days_filter
|
||||
#' @param merged_final_dir Directory with daily merged full-extent TIFFs
|
||||
#' @param final_output_dir Directory to store final reassembled mosaic
|
||||
#' @param file_name_tif Output filename for final mosaic
|
||||
#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km). Bigger area automatically gets more tiles.
|
||||
#' @return Path to final reassembled weekly mosaic
|
||||
#'
|
||||
create_weekly_mosaic_tiled <- function(dates, merged_final_dir,
|
||||
final_output_dir, file_name_tif,
|
||||
tile_size_m = 5000) {
|
||||
|
||||
# Get daily files for this week
|
||||
daily_files <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$")
|
||||
daily_files <- daily_files[grepl(paste(dates$days_filter, collapse = "|"), daily_files)]
|
||||
|
||||
if (length(daily_files) == 0) {
|
||||
safe_log("No daily files found for this week", "ERROR")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
safe_log(paste("Found", length(daily_files), "daily files for week", dates$week))
|
||||
|
||||
# Load first daily file to get extent and calculate tile grid
|
||||
safe_log("Step 1: Loading first daily file to establish tile structure")
|
||||
first_raster <- terra::rast(daily_files[1])
|
||||
|
||||
# Get CRS and convert tile_size_m to appropriate units
|
||||
raster_crs <- terra::crs(first_raster)
|
||||
|
||||
# If raster is in lat/lon (geographic), convert tile_size_m to degrees
|
||||
# 1 degree latitude ≈ 111 km, so 5000 m ≈ 0.045 degrees
|
||||
# Check for GEOGCRS (geographic coordinate system) in WKT string
|
||||
is_geographic <- grepl("GEOGCRS|longlat|geographic|ANGLEUNIT.*degree",
|
||||
as.character(raster_crs), ignore.case = TRUE)
|
||||
|
||||
if (is_geographic) {
|
||||
# Geographic CRS - convert meters to degrees
|
||||
# At equator: 1 degree ≈ 111 km
|
||||
tile_size_degrees <- tile_size_m / 111000 # 111 km per degree
|
||||
safe_log(paste("Raster is in geographic CRS (lat/lon). Converting", tile_size_m / 1000,
|
||||
"km to", round(tile_size_degrees, 4), "degrees"))
|
||||
} else {
|
||||
# Projected CRS - use meters directly
|
||||
tile_size_degrees <- tile_size_m
|
||||
safe_log(paste("Raster is in projected CRS. Using", tile_size_m / 1000, "km directly"))
|
||||
}
|
||||
|
||||
# Calculate n_tiles dynamically from extent and tile_size
|
||||
x_range <- terra::ext(first_raster)$xmax - terra::ext(first_raster)$xmin
|
||||
y_range <- terra::ext(first_raster)$ymax - terra::ext(first_raster)$ymin
|
||||
|
||||
n_tiles_x <- ceiling(x_range / tile_size_degrees)
|
||||
n_tiles_y <- ceiling(y_range / tile_size_degrees)
|
||||
n_tiles_total <- n_tiles_x * n_tiles_y
|
||||
|
||||
safe_log(paste("Step 2: Creating tile grid:", tile_size_m / 1000, "km tiles"))
|
||||
safe_log(paste(" Extent:", round(x_range, 4), "° ×", round(y_range, 4), "°"))
|
||||
safe_log(paste(" Grid:", n_tiles_y, "rows ×", n_tiles_x, "columns =", n_tiles_total, "tiles"))
|
||||
|
||||
# Calculate tile extents mathematically (no need to save temp files)
|
||||
extent <- terra::ext(first_raster)
|
||||
x_min <- extent$xmin
|
||||
y_min <- extent$ymin
|
||||
|
||||
# Create list of tile extents
|
||||
tile_extents <- list()
|
||||
tile_idx <- 0
|
||||
|
||||
for (row in 1:n_tiles_y) {
|
||||
for (col in 1:n_tiles_x) {
|
||||
tile_idx <- tile_idx + 1
|
||||
|
||||
# Calculate this tile's bounds
|
||||
tile_xmin <- x_min + (col - 1) * tile_size_degrees
|
||||
tile_xmax <- min(tile_xmin + tile_size_degrees, extent$xmax)
|
||||
tile_ymin <- y_min + (row - 1) * tile_size_degrees
|
||||
tile_ymax <- min(tile_ymin + tile_size_degrees, extent$ymax)
|
||||
|
||||
tile_extents[[tile_idx]] <- terra::ext(tile_xmin, tile_xmax, tile_ymin, tile_ymax)
|
||||
}
|
||||
}
|
||||
|
||||
safe_log(paste("Calculated", length(tile_extents), "tile extents"))
|
||||
|
||||
# Save tiles to Laravel storage directory
|
||||
tile_storage_dir <- file.path("laravel_app", "storage", "app", "angata", "daily_tiles")
|
||||
dir.create(tile_storage_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# For each tile, load from all daily files and create MAX
|
||||
safe_log("Step 3: Processing tiles (load per-tile across all days, create MAX for each)")
|
||||
tile_files_list <- list()
|
||||
|
||||
for (tile_idx in seq_along(tile_extents)) {
|
||||
if (tile_idx %% max(1, ceiling(n_tiles_total / 10)) == 0 || tile_idx == 1) {
|
||||
safe_log(paste(" Processing tile", tile_idx, "of", n_tiles_total))
|
||||
}
|
||||
|
||||
# Get this tile's extent
|
||||
tile_extent <- tile_extents[[tile_idx]]
|
||||
|
||||
# Load and crop all daily files to this tile extent
|
||||
daily_tile_data <- list()
|
||||
for (file_idx in seq_along(daily_files)) {
|
||||
tryCatch({
|
||||
r <- terra::rast(daily_files[file_idx])
|
||||
cropped <- terra::crop(r, tile_extent, snap = "in")
|
||||
daily_tile_data[[file_idx]] <- cropped
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Warning: Could not crop tile", tile_idx, "from",
|
||||
basename(daily_files[file_idx])), "WARNING")
|
||||
})
|
||||
}
|
||||
|
||||
if (length(daily_tile_data) == 0) {
|
||||
safe_log(paste("No valid data for tile", tile_idx), "WARNING")
|
||||
next
|
||||
}
|
||||
|
||||
# Create MAX composite for this tile
|
||||
if (length(daily_tile_data) == 1) {
|
||||
tile_max <- daily_tile_data[[1]]
|
||||
} else {
|
||||
collection <- terra::sprc(daily_tile_data)
|
||||
tile_max <- terra::mosaic(collection, fun = "max")
|
||||
}
|
||||
|
||||
# Save tile to disk (keeps memory low)
|
||||
tile_file <- file.path(tile_storage_dir, sprintf("tile_%04d.tif", tile_idx))
|
||||
terra::writeRaster(tile_max, tile_file, overwrite = TRUE)
|
||||
tile_files_list[[tile_idx]] <- tile_file
|
||||
}
|
||||
|
||||
if (length(tile_files_list) == 0) {
|
||||
safe_log("No tiles processed successfully", "ERROR")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
safe_log(paste("Step 4: Reassembling", length(tile_files_list), "tiles from disk into full-extent mosaic"))
|
||||
|
||||
# Load all tile files and reassemble
|
||||
tile_rasters <- lapply(tile_files_list, terra::rast)
|
||||
collection <- terra::sprc(tile_rasters)
|
||||
final_mosaic <- terra::mosaic(collection, fun = "first")
|
||||
|
||||
safe_log("Step 5: Saving final reassembled mosaic")
|
||||
|
||||
# Save
|
||||
dir.create(final_output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
final_file_path <- file.path(final_output_dir, file_name_tif)
|
||||
|
||||
tryCatch({
|
||||
terra::writeRaster(final_mosaic, final_file_path, overwrite = TRUE)
|
||||
safe_log(paste("✓ Weekly mosaic saved to:", final_file_path))
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error saving mosaic:", e$message), "ERROR")
|
||||
return(NULL)
|
||||
})
|
||||
|
||||
# Cleanup temporary tile files
|
||||
unlink(tile_storage_dir, recursive = TRUE)
|
||||
safe_log("Cleaned up temporary tile files")
|
||||
|
||||
return(final_file_path)
|
||||
}
|
||||
|
||||
#' Load tile MAX rasters for a specific week (for per-tile analysis)
|
||||
#'
|
||||
#' @param week Week number
|
||||
#' @param tile_dir Directory containing tile mosaics (week subdirectories)
|
||||
#' @return List of tile rasters with tile_id metadata
|
||||
#'
|
||||
load_tile_mosaics <- function(week, tile_dir) {
|
||||
week_dir <- file.path(tile_dir, paste0("week_", week))
|
||||
|
||||
if (!dir.exists(week_dir)) {
|
||||
warning(paste("Tile directory not found:", week_dir))
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Load all tile files
|
||||
tile_files <- list.files(week_dir, pattern = "^tile_.*\\.tif$", full.names = TRUE)
|
||||
|
||||
if (length(tile_files) == 0) {
|
||||
warning("No tile files found in:", week_dir)
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Sort by tile ID
|
||||
tile_numbers <- as.numeric(gsub(".*tile_(\\d+).*", "\\1", tile_files))
|
||||
tile_files <- tile_files[order(tile_numbers)]
|
||||
|
||||
# Load rasters
|
||||
tiles_list <- lapply(tile_files, terra::rast)
|
||||
names(tiles_list) <- sprintf("tile_%03d", sort(tile_numbers))
|
||||
|
||||
safe_log(paste("Loaded", length(tiles_list), "tile mosaics for week", week))
|
||||
|
||||
return(tiles_list)
|
||||
}
|
||||
|
|
@ -1,827 +0,0 @@
|
|||
# MOSAIC_CREATION_UTILS.R
|
||||
# ======================
|
||||
# Utility functions for creating weekly mosaics from daily satellite imagery.
|
||||
# These functions support cloud cover assessment, date handling, and mosaic creation.
|
||||
|
||||
#' Detect whether a project uses tile-based or single-file mosaic approach
|
||||
#'
|
||||
#' @param merged_final_tif_dir Directory containing merged_final_tif files
|
||||
#' @return List with has_tiles (logical), detected_tiles (vector), total_files (count)
|
||||
#'
|
||||
detect_mosaic_mode <- function(merged_final_tif_dir) {
|
||||
# Check if directory exists
|
||||
if (!dir.exists(merged_final_tif_dir)) {
|
||||
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
|
||||
}
|
||||
|
||||
# List all .tif files in merged_final_tif
|
||||
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
|
||||
|
||||
if (length(tif_files) == 0) {
|
||||
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
|
||||
}
|
||||
|
||||
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
|
||||
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
|
||||
tile_pattern <- "_(\\d{2})\\.tif$"
|
||||
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
|
||||
|
||||
has_tiles <- length(tile_files) > 0
|
||||
|
||||
return(list(
|
||||
has_tiles = has_tiles,
|
||||
detected_tiles = tile_files,
|
||||
total_files = length(tif_files)
|
||||
))
|
||||
}
|
||||
|
||||
#' Safe logging function
|
||||
#' @param message The message to log
|
||||
#' @param level The log level (default: "INFO")
|
||||
#' @return NULL (used for side effects)
|
||||
#'
|
||||
safe_log <- function(message, level = "INFO") {
|
||||
if (exists("log_message")) {
|
||||
log_message(message, level)
|
||||
} else {
|
||||
if (level %in% c("ERROR", "WARNING")) {
|
||||
warning(message)
|
||||
} else {
|
||||
message(message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#' Generate a sequence of dates for processing
|
||||
#'
|
||||
#' @param end_date The end date for the sequence (Date object)
|
||||
#' @param offset Number of days to look back from end_date
|
||||
#' @return A list containing week number, year, and a sequence of dates for filtering
|
||||
#'
|
||||
date_list <- function(end_date, offset) {
|
||||
# Input validation
|
||||
if (!lubridate::is.Date(end_date)) {
|
||||
end_date <- as.Date(end_date)
|
||||
if (is.na(end_date)) {
|
||||
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
||||
}
|
||||
}
|
||||
|
||||
offset <- as.numeric(offset)
|
||||
if (is.na(offset) || offset < 1) {
|
||||
stop("Invalid offset provided. Expected a positive number.")
|
||||
}
|
||||
|
||||
# Calculate date range
|
||||
offset <- offset - 1 # Adjust offset to include end_date
|
||||
start_date <- end_date - lubridate::days(offset)
|
||||
|
||||
# Extract week and year information
|
||||
week <- lubridate::isoweek(end_date)
|
||||
year <- lubridate::isoyear(end_date)
|
||||
|
||||
# Generate sequence of dates
|
||||
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
||||
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
||||
|
||||
# Log the date range
|
||||
safe_log(paste("Date range generated from", start_date, "to", end_date))
|
||||
|
||||
return(list(
|
||||
"week" = week,
|
||||
"year" = year,
|
||||
"days_filter" = days_filter,
|
||||
"start_date" = start_date,
|
||||
"end_date" = end_date
|
||||
))
|
||||
}
|
||||
|
||||
#' Create a weekly mosaic from available VRT files
|
||||
#'
|
||||
#' @param dates List from date_list() with date range info
|
||||
#' @param field_boundaries Field boundaries for image cropping
|
||||
#' @param daily_vrt_dir Directory containing VRT files
|
||||
#' @param merged_final_dir Directory with merged final rasters
|
||||
#' @param output_dir Output directory for weekly mosaics
|
||||
#' @param file_name_tif Output filename for the mosaic
|
||||
#' @param create_plots Whether to create visualization plots (default: TRUE)
|
||||
#' @return The file path of the saved mosaic
|
||||
#'
|
||||
create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir,
|
||||
merged_final_dir, output_dir, file_name_tif,
|
||||
create_plots = FALSE) {
|
||||
# Find VRT files for the specified date range
|
||||
vrt_list <- find_vrt_files(daily_vrt_dir, dates)
|
||||
|
||||
# Find final raster files for fallback
|
||||
raster_files_final <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$")
|
||||
|
||||
# Process the mosaic if VRT files are available
|
||||
if (length(vrt_list) > 0) {
|
||||
safe_log("VRT list created, assessing cloud cover for mosaic creation")
|
||||
|
||||
# Calculate aggregated cloud cover statistics (returns data frame for image selection)
|
||||
cloud_coverage_stats <- count_cloud_coverage(vrt_list, merged_final_dir, field_boundaries)
|
||||
|
||||
# Create mosaic based on cloud cover assessment
|
||||
mosaic <- create_mosaic(raster_files_final, cloud_coverage_stats, field_boundaries)
|
||||
|
||||
} else {
|
||||
safe_log("No VRT files available for the date range, creating empty mosaic with NA values", "WARNING")
|
||||
|
||||
# Create empty mosaic if no files are available
|
||||
if (length(raster_files_final) == 0) {
|
||||
stop("No VRT files or final raster files available to create mosaic")
|
||||
}
|
||||
|
||||
mosaic <- terra::rast(raster_files_final[1])
|
||||
mosaic <- terra::setValues(mosaic, NA)
|
||||
mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE)
|
||||
|
||||
names(mosaic) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||
}
|
||||
|
||||
# Save the mosaic (without mask files to avoid breaking other scripts)
|
||||
file_path <- save_mosaic(mosaic, output_dir, file_name_tif, create_plots, save_mask = FALSE)
|
||||
|
||||
safe_log(paste("Weekly mosaic processing completed for week", dates$week))
|
||||
|
||||
return(file_path)
|
||||
}
|
||||
|
||||
#' Find VRT files within a date range
|
||||
#'
|
||||
#' @param vrt_directory Directory containing VRT files
|
||||
#' @param dates List from date_list() function containing days_filter
|
||||
#' @return Character vector of VRT file paths
|
||||
#'
|
||||
find_vrt_files <- function(vrt_directory, dates) {
|
||||
# Get all VRT files in directory
|
||||
vrt_files <- list.files(here::here(vrt_directory), full.names = TRUE)
|
||||
|
||||
if (length(vrt_files) == 0) {
|
||||
warning("No VRT files found in directory: ", vrt_directory)
|
||||
return(character(0))
|
||||
}
|
||||
|
||||
# Filter files by dates
|
||||
vrt_list <- purrr::map(dates$days_filter, ~ vrt_files[grepl(pattern = .x, x = vrt_files)]) %>%
|
||||
purrr::compact() %>%
|
||||
purrr::flatten_chr()
|
||||
|
||||
# Log results
|
||||
safe_log(paste("Found", length(vrt_list), "VRT files for the date range"))
|
||||
|
||||
return(vrt_list)
|
||||
}
|
||||
|
||||
#' Count missing pixels (clouds) in rasters - per field analysis using actual TIF files
|
||||
#'
|
||||
#' @param vrt_list List of VRT file paths (used to extract dates for TIF file lookup)
|
||||
#' @param merged_final_dir Directory containing the actual TIF files (e.g., merged_final_tif)
|
||||
#' @param field_boundaries Field boundaries (sf object) for calculating cloud cover over field areas only
|
||||
#' @return Data frame with aggregated cloud statistics for each TIF file (used for mosaic selection)
|
||||
#'
|
||||
count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_boundaries = NULL) {
|
||||
if (length(vrt_list) == 0) {
|
||||
warning("No VRT files provided for cloud coverage calculation")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
# Extract dates from VRT filenames to find corresponding TIF files
|
||||
# VRT filenames are like "merged2025-12-18.vrt", TIF filenames are like "2025-12-18.tif"
|
||||
tif_dates <- gsub(".*([0-9]{4}-[0-9]{2}-[0-9]{2}).*", "\\1", basename(vrt_list))
|
||||
|
||||
# Build list of actual TIF files to use
|
||||
tif_files <- paste0(here::here(merged_final_dir), "/", tif_dates, ".tif")
|
||||
|
||||
# Check which TIF files exist
|
||||
tif_exist <- file.exists(tif_files)
|
||||
if (!any(tif_exist)) {
|
||||
warning("No TIF files found in directory: ", merged_final_dir)
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
tif_files <- tif_files[tif_exist]
|
||||
safe_log(paste("Found", length(tif_files), "TIF files for cloud coverage assessment"))
|
||||
|
||||
# Initialize list to store aggregated results
|
||||
aggregated_results <- list()
|
||||
|
||||
# Process each TIF file
|
||||
for (tif_idx in seq_along(tif_files)) {
|
||||
tif_file <- tif_files[tif_idx]
|
||||
|
||||
tryCatch({
|
||||
# Load the TIF file (typically has 5 bands: R, G, B, NIR, CI)
|
||||
current_raster <- terra::rast(tif_file)
|
||||
|
||||
# Extract the CI band (last band)
|
||||
ci_band <- current_raster[[terra::nlyr(current_raster)]]
|
||||
|
||||
# Create a unique field mask for THIS raster's extent
|
||||
# This handles cases where rasters have different extents due to missing data
|
||||
total_notna <- NA_real_
|
||||
total_pixels <- NA_real_
|
||||
|
||||
if (!is.null(field_boundaries)) {
|
||||
tryCatch({
|
||||
# Create mask specific to this raster's grid
|
||||
field_mask <- terra::rasterize(field_boundaries, ci_band, field = 1)
|
||||
|
||||
# Count pixels within field boundaries (for this specific raster)
|
||||
total_pixels <- terra::global(field_mask, fun = "notNA")$notNA
|
||||
|
||||
# Cloud coverage calculated only over field areas
|
||||
ci_field_masked <- terra::mask(ci_band, field_mask, maskvalue = NA)
|
||||
total_notna <- terra::global(ci_field_masked, fun = "notNA")$notNA
|
||||
|
||||
}, error = function(e) {
|
||||
# If field mask creation fails, fall back to entire raster
|
||||
safe_log(paste("Could not create field mask for", basename(tif_file), ":", e$message), "WARNING")
|
||||
})
|
||||
}
|
||||
|
||||
# If field mask failed, use entire raster
|
||||
if (is.na(total_notna)) {
|
||||
total_notna <- terra::global(ci_band, fun = "notNA")$notNA
|
||||
total_pixels <- terra::ncell(ci_band)
|
||||
}
|
||||
|
||||
# Calculate cloud coverage percentage (missing = clouds)
|
||||
missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
|
||||
|
||||
aggregated_results[[tif_idx]] <- data.frame(
|
||||
filename = tif_file,
|
||||
notNA = total_notna,
|
||||
total_pixels = total_pixels,
|
||||
missing_pixels_percentage = missing_pct,
|
||||
thres_5perc = as.integer(missing_pct < 5),
|
||||
thres_40perc = as.integer(missing_pct < 45),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING")
|
||||
aggregated_results[[tif_idx]] <<- data.frame(
|
||||
filename = tif_file,
|
||||
notNA = NA_real_,
|
||||
total_pixels = NA_real_,
|
||||
missing_pixels_percentage = 100,
|
||||
thres_5perc = 0,
|
||||
thres_40perc = 0,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
# Combine all aggregated results
|
||||
aggregated_df <- if (length(aggregated_results) > 0) {
|
||||
do.call(rbind, aggregated_results)
|
||||
} else {
|
||||
data.frame()
|
||||
}
|
||||
|
||||
# Log results
|
||||
safe_log(paste("Cloud coverage assessment completed for", length(vrt_list), "images"))
|
||||
|
||||
# Return aggregated data only
|
||||
return(aggregated_df)
|
||||
|
||||
}, error = function(e) {
|
||||
warning("Error in cloud coverage calculation: ", e$message)
|
||||
return(NULL)
|
||||
})
|
||||
}
|
||||
|
||||
#' Create a mosaic from merged_final_tif files based on cloud coverage
|
||||
#'
|
||||
#' @param tif_files List of processed TIF files (5 bands: R, G, B, NIR, CI)
|
||||
#' @param cloud_coverage_stats Cloud coverage statistics from count_cloud_coverage()
|
||||
#' @param field_boundaries Field boundaries for masking (optional)
|
||||
#' @return A SpatRaster object with 5 bands (Red, Green, Blue, NIR, CI)
|
||||
#'
|
||||
create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NULL) {
|
||||
# If no TIF files, return NULL
|
||||
if (length(tif_files) == 0) {
|
||||
safe_log("No TIF files available for mosaic creation", "ERROR")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Validate cloud coverage stats
|
||||
mosaic_type <- "Unknown" # Track what type of mosaic is being created
|
||||
|
||||
if (is.null(cloud_coverage_stats) || nrow(cloud_coverage_stats) == 0) {
|
||||
safe_log("No cloud coverage statistics available, using all files", "WARNING")
|
||||
rasters_to_use <- tif_files
|
||||
mosaic_type <- paste("all", length(tif_files), "available images")
|
||||
} else {
|
||||
# Determine best rasters to use based on cloud coverage thresholds
|
||||
# Count how many images meet each threshold
|
||||
num_5perc <- sum(cloud_coverage_stats$thres_5perc, na.rm = TRUE)
|
||||
num_40perc <- sum(cloud_coverage_stats$thres_40perc, na.rm = TRUE)
|
||||
|
||||
if (num_5perc > 1) {
|
||||
# Multiple images with <5% cloud coverage
|
||||
safe_log(paste("Creating max composite from", num_5perc, "cloud-free images (<5% clouds)"))
|
||||
mosaic_type <- paste(num_5perc, "cloud-free images (<5% clouds)")
|
||||
best_coverage <- which(cloud_coverage_stats$thres_5perc > 0)
|
||||
|
||||
} else if (num_5perc == 1) {
|
||||
# Single image with <5% cloud coverage
|
||||
safe_log("Using single cloud-free image (<5% clouds)")
|
||||
mosaic_type <- "single cloud-free image (<5% clouds)"
|
||||
best_coverage <- which(cloud_coverage_stats$thres_5perc > 0)
|
||||
|
||||
} else if (num_40perc > 1) {
|
||||
# Multiple images with <40% cloud coverage
|
||||
safe_log(paste("Creating max composite from", num_40perc, "partially cloudy images (<40% clouds)"), "WARNING")
|
||||
mosaic_type <- paste(num_40perc, "partially cloudy images (<40% clouds)")
|
||||
best_coverage <- which(cloud_coverage_stats$thres_40perc > 0)
|
||||
|
||||
} else if (num_40perc == 1) {
|
||||
# Single image with <40% cloud coverage
|
||||
safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
|
||||
mosaic_type <- "single partially cloudy image (<40% clouds)"
|
||||
best_coverage <- which(cloud_coverage_stats$thres_40perc > 0)
|
||||
|
||||
} else {
|
||||
# No cloud-free images available
|
||||
safe_log("No cloud-free images available, using all images", "WARNING")
|
||||
mosaic_type <- paste("all", nrow(cloud_coverage_stats), "available images")
|
||||
best_coverage <- seq_len(nrow(cloud_coverage_stats))
|
||||
}
|
||||
|
||||
# Get filenames of best-coverage images
|
||||
# Match by full filename from cloud stats to TIF files
|
||||
rasters_to_use <- character()
|
||||
for (idx in best_coverage) {
|
||||
# Get the full filename from cloud coverage stats
|
||||
cc_filename <- cloud_coverage_stats$filename[idx]
|
||||
|
||||
# Find matching TIF file by full filename
|
||||
matching_tif <- NULL
|
||||
for (tif_file in tif_files) {
|
||||
tif_basename <- basename(tif_file)
|
||||
if (tif_basename == cc_filename) {
|
||||
matching_tif <- tif_file
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (!is.null(matching_tif)) {
|
||||
rasters_to_use <- c(rasters_to_use, matching_tif)
|
||||
} else {
|
||||
safe_log(paste("Warning: Could not find TIF file matching cloud stats entry:", cc_filename), "WARNING")
|
||||
}
|
||||
}
|
||||
|
||||
if (length(rasters_to_use) == 0) {
|
||||
safe_log("Could not match cloud coverage stats to TIF files, using all files", "WARNING")
|
||||
rasters_to_use <- tif_files
|
||||
mosaic_type <- paste("all", length(tif_files), "available images")
|
||||
}
|
||||
}
|
||||
|
||||
# Load and mosaic the selected rasters
|
||||
if (length(rasters_to_use) == 1) {
|
||||
# Single file - just load it
|
||||
safe_log(paste("Using single image for mosaic:", basename(rasters_to_use)))
|
||||
mosaic <- terra::rast(rasters_to_use[1])
|
||||
} else {
|
||||
# Multiple files - merge handles different extents/grids automatically
|
||||
safe_log(paste("Creating mosaic from", length(rasters_to_use), "images"))
|
||||
|
||||
# Load all rasters with error handling - only keep successful loads
|
||||
all_rasters <- Filter(Negate(is.null), lapply(rasters_to_use, function(f) {
|
||||
tryCatch({
|
||||
terra::rast(f)
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Warning: Could not load", basename(f), ":", e$message), "WARNING")
|
||||
NULL # Return NULL on error, will be filtered out
|
||||
})
|
||||
}))
|
||||
|
||||
# Check what we loaded
|
||||
safe_log(paste("Loaded", length(all_rasters), "valid rasters from", length(rasters_to_use), "files"))
|
||||
|
||||
if (length(all_rasters) == 0) {
|
||||
safe_log("No valid rasters to merge", "WARNING")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Merge all rasters (handles different extents and grids automatically)
|
||||
if (length(all_rasters) == 1) {
|
||||
mosaic <- all_rasters[[1]]
|
||||
safe_log("Using single raster after filtering")
|
||||
} else {
|
||||
# Create max composite: take maximum value at each pixel across all dates
|
||||
# This skips clouds (low/zero CI values) in favor of clear pixels from other dates
|
||||
mosaic <- tryCatch({
|
||||
safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds"))
|
||||
|
||||
# Check if all rasters have identical grids (extent and resolution)
|
||||
# This is likely for per-tile mosaics from the same tiling scheme
|
||||
reference_raster <- all_rasters[[1]]
|
||||
ref_ext <- terra::ext(reference_raster)
|
||||
ref_res <- terra::res(reference_raster)
|
||||
|
||||
grids_match <- all(sapply(all_rasters[-1], function(r) {
|
||||
isTRUE(all.equal(terra::ext(r), ref_ext, tolerance = 1e-6)) &&
|
||||
isTRUE(all.equal(terra::res(r), ref_res, tolerance = 1e-6))
|
||||
}))
|
||||
|
||||
if (grids_match) {
|
||||
# All rasters have matching grids - no cropping/resampling needed!
|
||||
safe_log("All rasters have identical grids - stacking directly for max composite")
|
||||
raster_collection <- terra::sprc(all_rasters)
|
||||
max_mosaic <- terra::mosaic(raster_collection, fun = "max")
|
||||
} else {
|
||||
# Grids don't match - need to crop and resample
|
||||
safe_log("Rasters have different grids - cropping and resampling to common extent")
|
||||
|
||||
# Get extent from field boundaries if available, otherwise use raster union
|
||||
if (!is.null(field_boundaries)) {
|
||||
crop_extent <- terra::ext(field_boundaries)
|
||||
safe_log("Using field boundaries extent for consistent area across all dates")
|
||||
} else {
|
||||
# Use union of all extents (covers all data)
|
||||
crop_extent <- terra::ext(all_rasters[[1]])
|
||||
for (i in 2:length(all_rasters)) {
|
||||
crop_extent <- terra::union(crop_extent, terra::ext(all_rasters[[i]]))
|
||||
}
|
||||
safe_log("Using raster union extent")
|
||||
}
|
||||
|
||||
# Crop all rasters to common extent
|
||||
cropped_rasters <- lapply(all_rasters, function(r) {
|
||||
terra::crop(r, crop_extent)
|
||||
})
|
||||
|
||||
# Resample all cropped rasters to match the first one's grid
|
||||
reference_grid <- cropped_rasters[[1]]
|
||||
|
||||
aligned_rasters <- lapply(cropped_rasters, function(r) {
|
||||
if (isTRUE(all.equal(terra::ext(r), terra::ext(reference_grid), tolerance = 1e-6)) &&
|
||||
isTRUE(all.equal(terra::res(r), terra::res(reference_grid), tolerance = 1e-6))) {
|
||||
return(r) # Already aligned
|
||||
}
|
||||
terra::resample(r, reference_grid, method = "near")
|
||||
})
|
||||
|
||||
# Create max composite using mosaic on aligned rasters
|
||||
raster_collection <- terra::sprc(aligned_rasters)
|
||||
max_mosaic <- terra::mosaic(raster_collection, fun = "max")
|
||||
}
|
||||
|
||||
max_mosaic
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Max composite creation failed:", e$message), "WARNING")
|
||||
safe_log("Using first raster only as fallback")
|
||||
all_rasters[[1]]
|
||||
})
|
||||
safe_log(paste("Max composite created - taking clearest pixel at each location"))
|
||||
}
|
||||
|
||||
# Ensure we have exactly the required bands: Red, Green, Blue, NIR, CI
|
||||
required_bands <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||
available_bands <- names(mosaic)
|
||||
|
||||
# Check if all required bands are present
|
||||
if (!all(required_bands %in% available_bands)) {
|
||||
safe_log(paste("Warning: Not all required bands found. Available:", paste(available_bands, collapse = ", ")), "WARNING")
|
||||
}
|
||||
|
||||
# Select only the required bands in the correct order
|
||||
if (all(required_bands %in% available_bands)) {
|
||||
mosaic <- mosaic[[required_bands]]
|
||||
safe_log("Selected Red, Green, Blue, NIR, CI bands")
|
||||
} else {
|
||||
safe_log(paste("Warning: mosaic has", terra::nlyr(mosaic), "bands, expected 5 (R, G, B, NIR, CI)"), "WARNING")
|
||||
if (terra::nlyr(mosaic) > 5) {
|
||||
# Keep only first 5 bands as fallback
|
||||
mosaic <- terra::subset(mosaic, 1:5)
|
||||
safe_log("Keeping only first 5 bands as fallback")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Crop/mask to field boundaries if provided
|
||||
if (!is.null(field_boundaries)) {
|
||||
tryCatch({
|
||||
mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE)
|
||||
safe_log("Mosaic cropped to field boundaries")
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Could not crop to field boundaries:", e$message), "WARNING")
|
||||
# Return uncropped mosaic
|
||||
})
|
||||
}
|
||||
|
||||
# Log final mosaic summary
|
||||
safe_log(paste("✓ Mosaic created from", mosaic_type, "-", terra::nlyr(mosaic),
|
||||
"bands,", nrow(mosaic), "x", ncol(mosaic), "pixels"))
|
||||
|
||||
return(mosaic)
|
||||
}
|
||||
|
||||
#' Save a mosaic raster to disk
|
||||
#'
|
||||
#' @param mosaic_raster A SpatRaster object to save
|
||||
#' @param output_dir Directory to save the output
|
||||
#' @param file_name Filename for the output raster
|
||||
#' @param plot_result Whether to create visualizations (default: FALSE)
|
||||
#' @param save_mask Whether to save cloud masks separately (default: FALSE)
|
||||
#' @return The file path of the saved raster
|
||||
#'
|
||||
save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALSE, save_mask = FALSE) {
|
||||
# Validate input
|
||||
if (is.null(mosaic_raster)) {
|
||||
stop("No mosaic raster provided to save")
|
||||
}
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# Create full file path
|
||||
file_path <- here::here(output_dir, file_name)
|
||||
|
||||
# Get cloud mask if it exists
|
||||
cloud_mask <- attr(mosaic_raster, "cloud_mask")
|
||||
|
||||
# Save raster
|
||||
terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE)
|
||||
|
||||
# Save cloud mask if available and requested
|
||||
if (!is.null(cloud_mask) && save_mask) {
|
||||
# Create mask filename by adding _mask before extension
|
||||
mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name)
|
||||
mask_file_path <- here::here(output_dir, mask_file_name)
|
||||
|
||||
# Save the mask
|
||||
terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE)
|
||||
safe_log(paste("Cloud/shadow mask saved to:", mask_file_path))
|
||||
} else if (!is.null(cloud_mask)) {
|
||||
safe_log("Cloud mask available but not saved (save_mask = FALSE)")
|
||||
}
|
||||
|
||||
# Create plots if requested
|
||||
if (plot_result) {
|
||||
# Plot the CI band
|
||||
if ("CI" %in% names(mosaic_raster)) {
|
||||
terra::plot(mosaic_raster$CI, main = paste("CI map", file_name))
|
||||
}
|
||||
|
||||
# Plot RGB image
|
||||
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) {
|
||||
terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name))
|
||||
}
|
||||
|
||||
# Plot cloud mask if available
|
||||
if (!is.null(cloud_mask)) {
|
||||
terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name),
|
||||
col = c("red", "green"))
|
||||
}
|
||||
|
||||
# If we have both RGB and cloud mask, create a side-by-side comparison
|
||||
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) {
|
||||
old_par <- par(mfrow = c(1, 2))
|
||||
terra::plotRGB(mosaic_raster, main = "RGB Image")
|
||||
|
||||
# Create a colored mask for visualization (red = cloud/shadow, green = clear)
|
||||
mask_plot <- cloud_mask
|
||||
terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green"))
|
||||
par(old_par)
|
||||
}
|
||||
}
|
||||
|
||||
# Log save completion
|
||||
safe_log(paste("Mosaic saved to:", file_path))
|
||||
|
||||
return(file_path)
|
||||
}
|
||||
|
||||
#' Create weekly mosaic from pre-split tiles with MAX aggregation
|
||||
#'
|
||||
#' This function processes tiles created by Script 01 and processed by Script 02.
|
||||
#' For each of the 25 tiles independently:
|
||||
#' 1. Collects that tile from all dates in the range
|
||||
#' 2. Calculates cloud coverage per date
|
||||
#' 3. Uses create_mosaic logic to select best dates (cloud-clean preferred)
|
||||
#' 4. Creates MAX composite for that tile
|
||||
#' 5. Saves to weekly_tile_max/tile_XX.tif
|
||||
#'
|
||||
#' Input: merged_final_tif/[DATE]/[TILE_01.tif, TILE_02.tif, ..., TILE_25.tif]
|
||||
#' Output: weekly_tile_max/tile_01.tif through tile_25.tif (25 weekly MAX tiles)
|
||||
#'
|
||||
#' @param dates List from date_list() containing days_filter vector
|
||||
#' @param merged_final_dir Directory containing processed tiles (merged_final_tif)
|
||||
#' @param tile_output_dir Directory to save weekly MAX tiles (weekly_tile_max)
|
||||
#' @param field_boundaries Field boundaries for cloud coverage calculation (optional)
|
||||
#' @return List of paths to created tile files
|
||||
#'
|
||||
create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output_dir, field_boundaries = NULL) {
|
||||
|
||||
safe_log("Starting per-tile mosaic creation with cloud-based date selection...")
|
||||
|
||||
# Create output directory if needed
|
||||
dir.create(tile_output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# Step 1: Discover all tiles from all dates and group by tile ID
|
||||
tile_groups <- list() # Structure: tile_groups$tile_01 = list of files for that tile across dates
|
||||
|
||||
for (date in dates$days_filter) {
|
||||
date_dir <- file.path(merged_final_dir, date)
|
||||
|
||||
if (!dir.exists(date_dir)) {
|
||||
safe_log(paste(" Skipping date:", date, "- directory not found"), "WARNING")
|
||||
next
|
||||
}
|
||||
|
||||
tile_files <- list.files(date_dir, pattern = "\\.tif$", full.names = TRUE)
|
||||
|
||||
if (length(tile_files) == 0) {
|
||||
safe_log(paste(" No tiles found for date:", date), "WARNING")
|
||||
next
|
||||
}
|
||||
|
||||
# Extract tile ID from each filename (e.g., "2026-01-02_01.tif" → tile ID is "01")
|
||||
for (tile_file in tile_files) {
|
||||
# Extract tile number from filename
|
||||
tile_basename <- basename(tile_file)
|
||||
tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", tile_basename)
|
||||
|
||||
if (!tile_id %in% names(tile_groups)) {
|
||||
tile_groups[[tile_id]] <- list()
|
||||
}
|
||||
tile_groups[[tile_id]][[length(tile_groups[[tile_id]]) + 1]] <- tile_file
|
||||
}
|
||||
}
|
||||
|
||||
if (length(tile_groups) == 0) {
|
||||
stop("No tiles found in date range")
|
||||
}
|
||||
|
||||
safe_log(paste("Found", length(tile_groups), "unique tiles across all dates"))
|
||||
|
||||
# Step 2: Process each tile independently
|
||||
created_tiles <- character()
|
||||
|
||||
for (tile_id in names(tile_groups)) {
|
||||
tile_files_for_this_id <- unlist(tile_groups[[tile_id]])
|
||||
|
||||
safe_log(paste("Processing tile", tile_id, "with", length(tile_files_for_this_id), "dates"))
|
||||
|
||||
# Step 2a: Calculate cloud coverage for this tile across all dates
|
||||
cloud_stats_this_tile <- tryCatch({
|
||||
count_cloud_coverage_for_tile(
|
||||
tile_files = tile_files_for_this_id,
|
||||
field_boundaries = field_boundaries
|
||||
)
|
||||
}, error = function(e) {
|
||||
safe_log(paste(" Error calculating cloud coverage for tile", tile_id, "-", e$message), "WARNING")
|
||||
NULL
|
||||
})
|
||||
|
||||
if (is.null(cloud_stats_this_tile) || nrow(cloud_stats_this_tile) == 0) {
|
||||
safe_log(paste(" No valid cloud stats for tile", tile_id, "- using all available dates"), "WARNING")
|
||||
cloud_stats_this_tile <- NULL
|
||||
}
|
||||
|
||||
# Step 2b: Create MAX mosaic for this tile using create_mosaic logic
|
||||
tile_mosaic <- tryCatch({
|
||||
create_mosaic(
|
||||
tif_files = tile_files_for_this_id,
|
||||
cloud_coverage_stats = cloud_stats_this_tile,
|
||||
field_boundaries = NULL # Don't crop individual tiles
|
||||
)
|
||||
}, error = function(e) {
|
||||
safe_log(paste(" Error creating mosaic for tile", tile_id, "-", e$message), "WARNING")
|
||||
NULL
|
||||
})
|
||||
|
||||
if (is.null(tile_mosaic)) {
|
||||
safe_log(paste(" Failed to create mosaic for tile", tile_id, "- skipping"), "WARNING")
|
||||
next
|
||||
}
|
||||
|
||||
# DEBUG: Check mosaic content before saving
|
||||
safe_log(paste(" DEBUG: Mosaic tile", tile_id, "dimensions:", nrow(tile_mosaic), "x", ncol(tile_mosaic)))
|
||||
safe_log(paste(" DEBUG: Mosaic tile", tile_id, "bands:", terra::nlyr(tile_mosaic)))
|
||||
|
||||
# Check first band values
|
||||
band1 <- tile_mosaic[[1]]
|
||||
band1_min <- terra::global(band1, fun = "min", na.rm = TRUE)$min
|
||||
band1_max <- terra::global(band1, fun = "max", na.rm = TRUE)$max
|
||||
safe_log(paste(" DEBUG: Band 1 MIN=", round(band1_min, 2), "MAX=", round(band1_max, 2)))
|
||||
|
||||
# Step 2c: Save this tile's weekly MAX mosaic
|
||||
# Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1)
|
||||
tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_",
|
||||
sprintf("%02d", as.integer(tile_id)), ".tif")
|
||||
tile_output_path <- file.path(tile_output_dir, tile_filename)
|
||||
|
||||
tryCatch({
|
||||
terra::writeRaster(tile_mosaic, tile_output_path, overwrite = TRUE)
|
||||
safe_log(paste(" ✓ Saved tile", tile_id, "to:", tile_filename))
|
||||
created_tiles <- c(created_tiles, tile_output_path)
|
||||
}, error = function(e) {
|
||||
safe_log(paste(" Error saving tile", tile_id, "-", e$message), "WARNING")
|
||||
})
|
||||
}
|
||||
|
||||
safe_log(paste("✓ Created", length(created_tiles), "weekly MAX tiles in", tile_output_dir))
|
||||
|
||||
return(created_tiles)
|
||||
}
|
||||
|
||||
#' Calculate cloud coverage for a single tile across multiple dates
|
||||
#'
|
||||
#' Helper function for per-tile cloud assessment.
|
||||
#' Takes tile files from different dates and calculates cloud coverage for each.
|
||||
#' Cloud coverage is calculated ONLY within field boundaries, so total_pixels
|
||||
#' varies per tile based on which fields are present in that tile area.
|
||||
#'
|
||||
#' @param tile_files Character vector of tile file paths from different dates
|
||||
#' @param field_boundaries Field boundaries for analysis (required for per-field counting)
|
||||
#' @return Data frame with cloud stats for each date/tile
|
||||
#'
|
||||
count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) {
|
||||
if (length(tile_files) == 0) {
|
||||
warning("No tile files provided for cloud coverage calculation")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
aggregated_results <- list()
|
||||
|
||||
for (idx in seq_along(tile_files)) {
|
||||
tile_file <- tile_files[idx]
|
||||
|
||||
tryCatch({
|
||||
# Load the tile
|
||||
current_raster <- terra::rast(tile_file)
|
||||
|
||||
# Extract the CI band (last band in 5-band output)
|
||||
ci_band <- current_raster[[terra::nlyr(current_raster)]]
|
||||
|
||||
# Calculate cloud coverage within field boundaries
|
||||
if (!is.null(field_boundaries)) {
|
||||
# Create a reference raster template (same extent/resolution as ci_band, but independent of its data)
|
||||
# This ensures field_mask shows the potential field area even if ci_band is entirely cloudy (all NA)
|
||||
ref_template <- terra::rast(terra::ext(ci_band), resolution = terra::res(ci_band),
|
||||
crs = terra::crs(ci_band))
|
||||
terra::values(ref_template) <- 1 # Fill entire raster with 1s
|
||||
|
||||
# Crop and mask to field boundaries: keeps 1 inside fields, NA outside
|
||||
# This is independent of ci_band's data - represents the potential field area
|
||||
field_mask <- terra::crop(ref_template, field_boundaries, mask = TRUE)
|
||||
|
||||
# Count total potential field pixels from the mask (independent of clouds)
|
||||
total_pixels <- terra::global(field_mask, fun = "notNA")$notNA
|
||||
|
||||
# Now crop and mask CI band to field boundaries to count actual valid (non-cloud) pixels
|
||||
ci_masked <- terra::crop(ci_band, field_boundaries, mask = TRUE)
|
||||
total_notna <- terra::global(ci_masked, fun = "notNA")$notNA
|
||||
} else {
|
||||
# If no field boundaries provided, use entire tile
|
||||
total_notna <- terra::global(ci_band, fun = "notNA")$notNA
|
||||
total_pixels <- terra::ncell(ci_band)
|
||||
}
|
||||
|
||||
# Cloud coverage percentage (missing = clouds)
|
||||
missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
|
||||
|
||||
aggregated_results[[idx]] <- data.frame(
|
||||
filename = basename(tile_file), # Keep full filename: 2026-01-07_03.tif
|
||||
notNA = total_notna,
|
||||
total_pixels = total_pixels,
|
||||
missing_pixels_percentage = missing_pct,
|
||||
thres_5perc = as.integer(missing_pct < 5),
|
||||
thres_40perc = as.integer(missing_pct < 45),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error processing tile:", basename(tile_file), "-", e$message), "WARNING")
|
||||
aggregated_results[[idx]] <<- data.frame(
|
||||
filename = tile_file,
|
||||
notNA = NA_real_,
|
||||
total_pixels = NA_real_,
|
||||
missing_pixels_percentage = 100,
|
||||
thres_5perc = 0,
|
||||
thres_40perc = 0,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
# Combine results
|
||||
aggregated_df <- if (length(aggregated_results) > 0) {
|
||||
do.call(rbind, aggregated_results)
|
||||
} else {
|
||||
data.frame()
|
||||
}
|
||||
|
||||
return(aggregated_df)
|
||||
}
|
||||
|
||||
|
|
@ -1,28 +1,55 @@
|
|||
# 80_CALCULATE_KPIS.R (CONSOLIDATED KPI CALCULATION)
|
||||
# ============================================================================
|
||||
# UNIFIED KPI CALCULATION SCRIPT
|
||||
# SCRIPT 80: Key Performance Indicator (KPI) Calculation
|
||||
# ============================================================================
|
||||
# PURPOSE:
|
||||
# Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes
|
||||
# field uniformity, growth trends (4-week and 8-week), area change detection,
|
||||
# TCH forecasts, stress identification, and weed presence. Generates
|
||||
# comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting.
|
||||
#
|
||||
# This script combines:
|
||||
# 1. Per-field weekly analysis (from 09c: field-level trends, phases, statuses)
|
||||
# 2. Farm-level KPI metrics (from old 09: 6 high-level indicators)
|
||||
# INPUT DATA:
|
||||
# - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif
|
||||
# - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
|
||||
# - Source 3: Historical field stats (RDS from previous weeks)
|
||||
#
|
||||
# FEATURES:
|
||||
# - Per-field analysis with SC-64 enhancements (4-week trends, CI percentiles, etc.)
|
||||
# - Farm-level KPI calculation (6 metrics for executive overview)
|
||||
# - Parallel processing (tile-aware, 1000+ fields supported)
|
||||
# - Comprehensive Excel + RDS + CSV exports (21 columns per spec)
|
||||
# - Test mode for development
|
||||
|
||||
# CRITICAL INTEGRATIONS:
|
||||
# OUTPUT DATA:
|
||||
# - Destination: laravel_app/storage/app/{project}/output/
|
||||
# - Format: Excel (.xlsx), CSV (.csv), RDS (.rds)
|
||||
# - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata
|
||||
#
|
||||
# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307)
|
||||
# [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv
|
||||
# Columns: field, imminent_prob, detected_prob, week, year
|
||||
# [✓] LEFT JOIN to field_analysis_df by field
|
||||
# [✓] Use actual harvest probability data instead of placeholder
|
||||
# USAGE:
|
||||
# Rscript 80_calculate_kpis.R [project] [week] [year]
|
||||
#
|
||||
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23)
|
||||
# [✓] Load harvest.xlsx with planting_date (season_start)
|
||||
# Example (Windows PowerShell):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
|
||||
#
|
||||
# PARAMETERS:
|
||||
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||||
# - week: ISO week number (numeric, 1-53, default current week)
|
||||
# - year: ISO year (numeric, default current year)
|
||||
#
|
||||
# CLIENT TYPES:
|
||||
# - cane_supply (ANGATA): Yes - uses 80_utils_cane_supply.R (placeholder)
|
||||
# - agronomic_support (AURA): Yes - uses 80_utils_agronomic_support.R (6 KPI funcs)
|
||||
#
|
||||
# DEPENDENCIES:
|
||||
# - Packages: terra, sf, tidyverse, lubridate, writexl, spdep
|
||||
# - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R
|
||||
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
|
||||
# - Input data: Weekly mosaic TIFFs (Script 40 output)
|
||||
# - Data directories: weekly_mosaic/, output/ (created if missing)
|
||||
#
|
||||
# NOTES:
|
||||
# - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast,
|
||||
# Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling %
|
||||
# - Client-aware: Conditional sourcing based on client_config$script_90_compatible
|
||||
# - Exports: 21-column output with field-level metrics, phase, status, alerts
|
||||
# - Supports 4-week and 8-week trend analysis from historical RDS cache
|
||||
# - Critical dependency for Scripts 90/91 (reporting/dashboards)
|
||||
# - Uses Moran's I for spatial clustering detection (weed/stress patterns)
|
||||
#
|
||||
#
|
||||
# ============================================================================
|
||||
# [✓] Extract planting dates per field
|
||||
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
|
||||
#
|
||||
|
|
@ -96,48 +123,77 @@ WEEKS_FOR_CV_TREND_LONG <- 8
|
|||
# ============================================================================
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
library(here)
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(lubridate)
|
||||
library(readr)
|
||||
library(readxl)
|
||||
library(writexl)
|
||||
library(purrr)
|
||||
library(furrr)
|
||||
library(future)
|
||||
library(caret)
|
||||
library(CAST)
|
||||
library(randomForest)
|
||||
# File path handling
|
||||
library(here) # For relative path resolution (platform-independent file paths)
|
||||
|
||||
# Spatial data handling
|
||||
library(sf) # For reading/manipulating field boundaries (GeoJSON)
|
||||
library(terra) # For raster operations (reading mosaic TIFFs)
|
||||
|
||||
# Data manipulation
|
||||
library(dplyr) # For data wrangling (filter, mutate, group_by, summarize)
|
||||
library(tidyr) # For data reshaping (pivot_longer, pivot_wider, gather)
|
||||
library(lubridate) # For date/time operations (week extraction, date arithmetic)
|
||||
|
||||
# File I/O
|
||||
library(readr) # For reading CSV files (harvest predictions from Python)
|
||||
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
|
||||
library(writexl) # For writing Excel outputs (KPI summary tables)
|
||||
library(progress) # For progress bars during field processing
|
||||
|
||||
# ML/Analysis (optional - only for harvest model inference)
|
||||
tryCatch({
|
||||
library(torch)
|
||||
library(torch) # For PyTorch model inference (harvest readiness prediction)
|
||||
}, error = function(e) {
|
||||
message("Note: torch package not available - harvest model inference will be skipped")
|
||||
})
|
||||
})
|
||||
|
||||
# ============================================================================
|
||||
# LOAD CONFIGURATION - MUST BE DONE FIRST
|
||||
# ============================================================================
|
||||
# Parameters must be loaded early to determine client type and paths
|
||||
|
||||
tryCatch({
|
||||
source(here("r_app", "parameters_project.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading parameters_project.R: ", e$message)
|
||||
})
|
||||
|
||||
# Get client configuration from global project setup
|
||||
# NOTE: This cannot be done until parameters_project.R is sourced
|
||||
# We determine client_type from the current project_dir (if running in main() context)
|
||||
# For now, set a placeholder that will be overridden in main()
|
||||
if (exists("project_dir", envir = .GlobalEnv)) {
|
||||
temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv))
|
||||
} else {
|
||||
temp_client_type <- "cane_supply" # Safe default
|
||||
}
|
||||
temp_client_config <- get_client_kpi_config(temp_client_type)
|
||||
|
||||
# ============================================================================
|
||||
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
|
||||
# ============================================================================
|
||||
|
||||
# All clients use the common utilities (shared statistical functions, reporting)
|
||||
tryCatch({
|
||||
source(here("r_app", "80_weekly_stats_utils.R"))
|
||||
source(here("r_app", "80_utils_common.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading 80_weekly_stats_utils.R: ", e$message)
|
||||
stop("Error loading 80_utils_common.R: ", e$message)
|
||||
})
|
||||
|
||||
# Load both client-specific utilities (functions will be available for both workflows)
|
||||
# This avoids needing to determine client type at startup time
|
||||
message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...")
|
||||
tryCatch({
|
||||
source(here("r_app", "80_utils_agronomic_support.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading 80_utils_agronomic_support.R: ", e$message)
|
||||
})
|
||||
|
||||
tryCatch({
|
||||
source(here("r_app", "80_report_building_utils.R"))
|
||||
source(here("r_app", "80_utils_cane_supply.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading 80_report_building_utils.R: ", e$message)
|
||||
})
|
||||
|
||||
tryCatch({
|
||||
source(here("r_app", "kpi_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading kpi_utils.R: ", e$message)
|
||||
stop("Error loading 80_utils_cane_supply.R: ", e$message)
|
||||
})
|
||||
|
||||
# ============================================================================
|
||||
|
|
@ -176,9 +232,6 @@ STATUS_TRIGGERS <- data.frame(
|
|||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
main <- function() {
|
||||
# Parse command-line arguments
|
||||
|
|
@ -187,7 +240,8 @@ main <- function() {
|
|||
# end_date (arg 1)
|
||||
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
|
||||
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
|
||||
as.Date(args[1])
|
||||
# Parse date explicitly in YYYY-MM-DD format from command line
|
||||
as.Date(args[1], format = "%Y-%m-%d")
|
||||
} else if (exists("end_date", envir = .GlobalEnv)) {
|
||||
global_date <- get("end_date", envir = .GlobalEnv)
|
||||
# Check if it's a valid Date with length > 0
|
||||
|
|
@ -233,7 +287,7 @@ main <- function() {
|
|||
message(strrep("=", 70))
|
||||
message("Date:", format(end_date, "%Y-%m-%d"))
|
||||
message("Project:", project_dir)
|
||||
message("Mode: Per-field analysis (SC-64) + Farm-level KPIs")
|
||||
message("Mode: Conditional KPI calculation based on client type")
|
||||
message("")
|
||||
|
||||
# Load configuration and utilities
|
||||
|
|
@ -245,48 +299,151 @@ main <- function() {
|
|||
stop("Error loading parameters_project.R: ", e$message)
|
||||
})
|
||||
|
||||
# Initialize project directories and configuration
|
||||
setup <- setup_project_directories(project_dir)
|
||||
|
||||
# DETERMINE CLIENT TYPE AND KPI CONFIGURATION
|
||||
client_type <- get_client_type(project_dir)
|
||||
client_config <- get_client_kpi_config(client_type)
|
||||
|
||||
# Assign to global environment so utilities and downstream scripts can access it
|
||||
assign("client_config", client_config, envir = .GlobalEnv)
|
||||
|
||||
message("Client Type:", client_type)
|
||||
message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
|
||||
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
|
||||
|
||||
# Use centralized paths from setup object (no need for file.path calls)
|
||||
weekly_mosaic <- setup$weekly_mosaic_dir
|
||||
daily_vals_dir <- setup$daily_ci_vals_dir
|
||||
|
||||
tryCatch({
|
||||
source(here("r_app", "30_growth_model_utils.R"))
|
||||
}, error = function(e) {
|
||||
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
|
||||
})
|
||||
|
||||
# ========== PER-FIELD ANALYSIS (SC-64) ==========
|
||||
# CONDITIONAL EXECUTION BASED ON CLIENT TYPE
|
||||
# ============================================
|
||||
|
||||
if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) {
|
||||
# AURA WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility
|
||||
message("\n", strrep("=", 70))
|
||||
message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
|
||||
message(strrep("=", 70))
|
||||
|
||||
# Prepare inputs for KPI calculation (already created by setup_project_directories)
|
||||
reports_dir_kpi <- setup$kpi_reports_dir
|
||||
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
|
||||
|
||||
# Load field boundaries for AURA workflow (use data_dir from setup)
|
||||
message("\nLoading field boundaries for AURA KPI calculation...")
|
||||
tryCatch({
|
||||
boundaries_result <- load_field_boundaries(setup$data_dir)
|
||||
|
||||
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
|
||||
field_boundaries_sf <- boundaries_result$field_boundaries_sf
|
||||
} else {
|
||||
field_boundaries_sf <- boundaries_result
|
||||
}
|
||||
|
||||
if (nrow(field_boundaries_sf) == 0) {
|
||||
stop("No fields loaded from boundaries")
|
||||
}
|
||||
|
||||
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
|
||||
}, error = function(e) {
|
||||
stop("ERROR loading field boundaries: ", e$message)
|
||||
})
|
||||
|
||||
# Load harvesting data
|
||||
if (!exists("harvesting_data")) {
|
||||
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
|
||||
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
|
||||
}
|
||||
|
||||
# Extract current week/year from end_date
|
||||
current_week <- as.numeric(format(end_date, "%V"))
|
||||
current_year <- as.numeric(format(end_date, "%G"))
|
||||
|
||||
# Call with correct signature
|
||||
kpi_results <- calculate_all_kpis(
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
current_week = current_week,
|
||||
current_year = current_year,
|
||||
current_mosaic_dir = setup$weekly_mosaic_dir,
|
||||
harvesting_data = harvesting_data,
|
||||
ci_rds_path = cumulative_CI_vals_dir,
|
||||
output_dir = reports_dir_kpi
|
||||
)
|
||||
|
||||
cat("\n=== AURA KPI CALCULATION COMPLETE ===\n")
|
||||
cat("Summary tables saved for Script 90 integration\n")
|
||||
cat("Output directory:", reports_dir_kpi, "\n\n")
|
||||
|
||||
} else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) {
|
||||
# CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment
|
||||
message("\n", strrep("=", 70))
|
||||
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
|
||||
message(strrep("=", 70))
|
||||
|
||||
# Set reports_dir for CANE_SUPPLY workflow (used by export functions)
|
||||
reports_dir <- setup$kpi_reports_dir
|
||||
data_dir <- setup$data_dir
|
||||
|
||||
# Continue with existing per-field analysis code below
|
||||
|
||||
message("\n", strrep("-", 70))
|
||||
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
|
||||
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
|
||||
message(strrep("-", 70))
|
||||
|
||||
# Calculate ISO week numbers and ISO years using helper from kpi_utils.R
|
||||
weeks <- calculate_week_numbers(end_date)
|
||||
current_week <- weeks$current_week
|
||||
current_iso_year <- weeks$current_iso_year
|
||||
current_year <- weeks$current_year
|
||||
previous_week <- weeks$previous_week
|
||||
previous_iso_year <- weeks$previous_iso_year
|
||||
previous_year <- weeks$previous_year
|
||||
|
||||
message(paste("Week:", current_week, "/ ISO Year:", current_iso_year))
|
||||
message(paste("Week:", current_week, "/ Year (ISO 8601):", year))
|
||||
|
||||
# Find tile files - approach from Script 20
|
||||
message("Finding tile files...")
|
||||
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, current_iso_year)
|
||||
# Find per-field weekly mosaics
|
||||
message("Finding per-field weekly mosaics...")
|
||||
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
|
||||
|
||||
# Detect grid size subdirectory
|
||||
detected_grid_size <- NA
|
||||
if (dir.exists(weekly_tile_max)) {
|
||||
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
|
||||
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
||||
if (length(grid_patterns) > 0) {
|
||||
detected_grid_size <- grid_patterns[1]
|
||||
mosaic_dir <- file.path(weekly_tile_max, detected_grid_size)
|
||||
message(paste(" Using grid-size subdirectory:", detected_grid_size))
|
||||
if (!dir.exists(weekly_mosaic)) {
|
||||
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
|
||||
"\nScript 40 (mosaic creation) must be run first."))
|
||||
}
|
||||
|
||||
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
|
||||
field_dirs <- field_dirs[field_dirs != ""]
|
||||
|
||||
if (length(field_dirs) == 0) {
|
||||
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
|
||||
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
|
||||
}
|
||||
|
||||
# Verify we have mosaics for this week
|
||||
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
|
||||
per_field_files <- c()
|
||||
for (field in field_dirs) {
|
||||
field_mosaic_dir <- file.path(weekly_mosaic, field)
|
||||
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
||||
if (length(files) > 0) {
|
||||
per_field_files <- c(per_field_files, files)
|
||||
}
|
||||
}
|
||||
|
||||
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
||||
if (length(tile_files) == 0) {
|
||||
stop(paste("No tile files found for week", current_week, current_iso_year, "in", mosaic_dir))
|
||||
if (length(per_field_files) == 0) {
|
||||
stop(paste("ERROR: No mosaics found for week", current_week, "year", year,
|
||||
"\nExpected pattern:", single_file_pattern,
|
||||
"\nChecked:", weekly_mosaic))
|
||||
}
|
||||
message(paste(" Found", length(tile_files), "tiles"))
|
||||
|
||||
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
|
||||
|
||||
mosaic_mode <- "per-field"
|
||||
mosaic_dir <- weekly_mosaic
|
||||
|
||||
|
||||
|
||||
# Load field boundaries
|
||||
tryCatch({
|
||||
|
|
@ -316,10 +473,11 @@ main <- function() {
|
|||
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
|
||||
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
|
||||
|
||||
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir,
|
||||
historical_data <- load_historical_field_data(project_dir, current_week, year, reports_dir,
|
||||
num_weeks = num_weeks_to_load,
|
||||
auto_generate = allow_auto_gen,
|
||||
field_boundaries_sf = field_boundaries_sf)
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
daily_vals_dir = daily_vals_dir)
|
||||
|
||||
# Load harvest.xlsx for planting dates (season_start)
|
||||
message("\nLoading harvest data from harvest.xlsx for planting dates...")
|
||||
|
|
@ -353,14 +511,15 @@ main <- function() {
|
|||
)
|
||||
}
|
||||
|
||||
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
|
||||
# ============================================================================
|
||||
# NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
|
||||
# ============================================================================
|
||||
# Build per-field configuration
|
||||
message("\nPreparing mosaic configuration for statistics calculation...")
|
||||
message(" ✓ Using per-field mosaic architecture (1 TIFF per field)")
|
||||
|
||||
# Build tile grid (needed by calculate_field_statistics)
|
||||
message("\nBuilding tile grid for current week...")
|
||||
tile_grid <- build_tile_grid(mosaic_dir, current_week, current_iso_year)
|
||||
# Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
|
||||
field_grid <- list(
|
||||
mosaic_dir = mosaic_dir,
|
||||
mode = "per-field"
|
||||
)
|
||||
|
||||
message("\nUsing modular RDS-based approach for weekly statistics...")
|
||||
|
||||
|
|
@ -371,7 +530,7 @@ main <- function() {
|
|||
year = current_iso_year,
|
||||
project_dir = project_dir,
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
mosaic_dir = tile_grid$mosaic_dir,
|
||||
mosaic_dir = field_grid$mosaic_dir,
|
||||
reports_dir = reports_dir,
|
||||
report_date = end_date
|
||||
)
|
||||
|
|
@ -386,10 +545,10 @@ main <- function() {
|
|||
|
||||
prev_stats <- load_or_calculate_weekly_stats(
|
||||
week_num = previous_week,
|
||||
year = previous_iso_year,
|
||||
year = previous_year,
|
||||
project_dir = project_dir,
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
mosaic_dir = tile_grid$mosaic_dir,
|
||||
mosaic_dir = field_grid$mosaic_dir,
|
||||
reports_dir = reports_dir,
|
||||
report_date = prev_report_date
|
||||
)
|
||||
|
|
@ -409,9 +568,11 @@ main <- function() {
|
|||
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
|
||||
|
||||
# Load weekly harvest probabilities from script 31 (if available)
|
||||
# Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
|
||||
message("\n4. Loading harvest probabilities from script 31...")
|
||||
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
|
||||
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_iso_year))
|
||||
harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
|
||||
harvest_prob_file <- file.path(harvest_prob_dir,
|
||||
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
|
||||
message(paste(" Looking for:", harvest_prob_file))
|
||||
|
||||
imminent_prob_data <- tryCatch({
|
||||
|
|
@ -854,6 +1015,14 @@ main <- function() {
|
|||
cat(" - Per-field data exported\n")
|
||||
cat(" - Farm-level KPIs calculated\n")
|
||||
cat(" - All outputs in:", reports_dir, "\n\n")
|
||||
|
||||
} else {
|
||||
# Unknown client type - log warning and exit
|
||||
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
|
||||
cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
|
||||
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
|
||||
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
if (sys.nframe() == 0) {
|
||||
|
|
|
|||
|
|
@ -1,258 +0,0 @@
|
|||
# 80_REPORT_BUILDING_UTILS.R
|
||||
# ============================================================================
|
||||
# UTILITY FUNCTIONS FOR REPORT GENERATION AND EXCEL/CSV EXPORT
|
||||
#
|
||||
# This file contains reusable functions for:
|
||||
# - Field analysis summary generation
|
||||
# - Excel/CSV/RDS export functionality
|
||||
# - Farm-level KPI aggregation and summary
|
||||
# - Tile-based KPI extraction (alternative calculation method)
|
||||
#
|
||||
# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts
|
||||
# ============================================================================
|
||||
|
||||
# ============================================================================
|
||||
# SUMMARY GENERATION
|
||||
# ============================================================================
|
||||
|
||||
generate_field_analysis_summary <- function(field_df) {
|
||||
message("Generating summary statistics...")
|
||||
|
||||
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
|
||||
|
||||
germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE)
|
||||
tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE)
|
||||
grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE)
|
||||
maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE)
|
||||
unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE)
|
||||
|
||||
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
|
||||
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
|
||||
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
|
||||
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
|
||||
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
|
||||
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
|
||||
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
|
||||
|
||||
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
|
||||
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
|
||||
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
|
||||
total_fields <- nrow(field_df)
|
||||
|
||||
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
|
||||
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
|
||||
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
|
||||
|
||||
summary_df <- data.frame(
|
||||
Category = c(
|
||||
"--- PHASE DISTRIBUTION ---",
|
||||
"Germination",
|
||||
"Tillering",
|
||||
"Grand Growth",
|
||||
"Maturation",
|
||||
"Unknown phase",
|
||||
"--- STATUS TRIGGERS ---",
|
||||
"Harvest ready",
|
||||
"Stress detected",
|
||||
"Strong recovery",
|
||||
"Growth on track",
|
||||
"Germination complete",
|
||||
"Germination started",
|
||||
"No trigger",
|
||||
"--- CLOUD COVERAGE (FIELDS) ---",
|
||||
"Clear view",
|
||||
"Partial coverage",
|
||||
"No image available",
|
||||
"--- CLOUD COVERAGE (ACREAGE) ---",
|
||||
"Clear view",
|
||||
"Partial coverage",
|
||||
"No image available",
|
||||
"--- TOTAL ---",
|
||||
"Total Acreage"
|
||||
),
|
||||
Acreage = c(
|
||||
NA,
|
||||
round(germination_acreage, 2),
|
||||
round(tillering_acreage, 2),
|
||||
round(grand_growth_acreage, 2),
|
||||
round(maturation_acreage, 2),
|
||||
round(unknown_phase_acreage, 2),
|
||||
NA,
|
||||
round(harvest_ready_acreage, 2),
|
||||
round(stress_acreage, 2),
|
||||
round(recovery_acreage, 2),
|
||||
round(growth_on_track_acreage, 2),
|
||||
round(germination_complete_acreage, 2),
|
||||
round(germination_started_acreage, 2),
|
||||
round(no_trigger_acreage, 2),
|
||||
NA,
|
||||
paste0(clear_fields, " fields"),
|
||||
paste0(partial_fields, " fields"),
|
||||
paste0(no_image_fields, " fields"),
|
||||
NA,
|
||||
round(clear_acreage, 2),
|
||||
round(partial_acreage, 2),
|
||||
round(no_image_acreage, 2),
|
||||
NA,
|
||||
round(total_acreage, 2)
|
||||
),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
return(summary_df)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# EXPORT FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
|
||||
message("Exporting per-field analysis to Excel, CSV, and RDS...")
|
||||
|
||||
field_df_rounded <- field_df %>%
|
||||
mutate(across(where(is.numeric), ~ round(., 2)))
|
||||
|
||||
# Handle NULL summary_df
|
||||
summary_df_rounded <- if (!is.null(summary_df)) {
|
||||
summary_df %>%
|
||||
mutate(across(where(is.numeric), ~ round(., 2)))
|
||||
} else {
|
||||
NULL
|
||||
}
|
||||
|
||||
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
|
||||
if (!dir.exists(output_subdir)) {
|
||||
dir.create(output_subdir, recursive = TRUE)
|
||||
}
|
||||
|
||||
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
|
||||
excel_path <- file.path(output_subdir, excel_filename)
|
||||
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
|
||||
|
||||
# Build sheets list dynamically
|
||||
sheets <- list(
|
||||
"Field Data" = field_df_rounded
|
||||
)
|
||||
if (!is.null(summary_df_rounded)) {
|
||||
sheets[["Summary"]] <- summary_df_rounded
|
||||
}
|
||||
|
||||
write_xlsx(sheets, excel_path)
|
||||
message(paste("✓ Field analysis Excel exported to:", excel_path))
|
||||
|
||||
kpi_data <- list(
|
||||
field_analysis = field_df_rounded,
|
||||
field_analysis_summary = summary_df_rounded,
|
||||
metadata = list(
|
||||
current_week = current_week,
|
||||
year = year,
|
||||
project = project_dir,
|
||||
created_at = Sys.time()
|
||||
)
|
||||
)
|
||||
|
||||
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
|
||||
rds_path <- file.path(reports_dir, "kpis", rds_filename)
|
||||
|
||||
saveRDS(kpi_data, rds_path)
|
||||
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
||||
|
||||
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
|
||||
csv_path <- file.path(output_subdir, csv_filename)
|
||||
write_csv(field_df_rounded, csv_path)
|
||||
message(paste("✓ Field analysis CSV exported to:", csv_path))
|
||||
|
||||
return(list(excel = excel_path, rds = rds_path, csv = csv_path))
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# TILE-BASED KPI EXTRACTION (Alternative calculation method)
|
||||
# ============================================================================
|
||||
|
||||
# [COMMENTED OUT / UNUSED - kept for reference]
|
||||
# These functions provide tile-based extraction as an alternative to field_statistics approach
|
||||
# Currently replaced by calculate_field_statistics() in 80_weekly_stats_utils.R
|
||||
# Uncomment if parallel processing of tiles is needed in future
|
||||
|
||||
# calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) {
|
||||
# message("Calculating field-level KPI statistics from tiles...")
|
||||
#
|
||||
# tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
|
||||
# tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE)
|
||||
#
|
||||
# if (length(tile_files) == 0) {
|
||||
# message("No tiles found for week", week_num, year)
|
||||
# return(NULL)
|
||||
# }
|
||||
#
|
||||
# message(paste("Processing", length(tile_files), "tiles in parallel..."))
|
||||
#
|
||||
# field_kpi_list <- furrr::future_map(
|
||||
# tile_files,
|
||||
# ~ process_single_kpi_tile(
|
||||
# tile_file = .,
|
||||
# field_boundaries_sf = field_boundaries_sf,
|
||||
# tile_grid = tile_grid
|
||||
# ),
|
||||
# .progress = TRUE,
|
||||
# .options = furrr::furrr_options(seed = TRUE)
|
||||
# )
|
||||
#
|
||||
# field_kpi_stats <- dplyr::bind_rows(field_kpi_list)
|
||||
#
|
||||
# if (nrow(field_kpi_stats) == 0) {
|
||||
# message(" No KPI data extracted from tiles")
|
||||
# return(NULL)
|
||||
# }
|
||||
#
|
||||
# message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields"))
|
||||
# return(field_kpi_stats)
|
||||
# }
|
||||
|
||||
# process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) {
|
||||
# # Helper function for calculate_field_kpis_from_tiles
|
||||
# tryCatch({
|
||||
# tile_basename <- basename(tile_file)
|
||||
# tile_raster <- terra::rast(tile_file)
|
||||
# ci_band <- tile_raster[[1]]
|
||||
#
|
||||
# field_bbox <- sf::st_bbox(field_boundaries_sf)
|
||||
# ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out")
|
||||
#
|
||||
# extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE)
|
||||
#
|
||||
# tile_results <- data.frame()
|
||||
# tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename))
|
||||
#
|
||||
# for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
||||
# field_id <- field_boundaries_sf$field[field_idx]
|
||||
# mean_ci <- extracted_vals[field_idx, 2]
|
||||
#
|
||||
# if (is.na(mean_ci)) {
|
||||
# next
|
||||
# }
|
||||
#
|
||||
# tile_results <- rbind(tile_results, data.frame(
|
||||
# field = field_id,
|
||||
# tile_id = tile_id_match,
|
||||
# tile_file = tile_basename,
|
||||
# mean_ci = round(mean_ci, 4),
|
||||
# stringsAsFactors = FALSE
|
||||
# ))
|
||||
# }
|
||||
#
|
||||
# return(tile_results)
|
||||
#
|
||||
# }, error = function(e) {
|
||||
# message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message))
|
||||
# return(data.frame())
|
||||
# })
|
||||
# }
|
||||
|
||||
# calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf,
|
||||
# harvesting_data, cumulative_CI_vals_dir,
|
||||
# weekly_CI_mosaic, reports_dir, current_week, year,
|
||||
# tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") {
|
||||
# # Farm-level KPI calculation using tile-based extraction (alternative approach)
|
||||
# # [Implementation kept as reference for alternative calculation method]
|
||||
# }
|
||||
666
r_app/80_utils_agronomic_support.R
Normal file
666
r_app/80_utils_agronomic_support.R
Normal file
|
|
@ -0,0 +1,666 @@
|
|||
# 80_UTILS_AGRONOMIC_SUPPORT.R
|
||||
# ============================================================================
|
||||
# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support)
|
||||
#
|
||||
# Contains all 6 AURA KPI calculation functions and helpers:
|
||||
# - Field uniformity KPI (CV-based, spatial autocorrelation)
|
||||
# - Area change KPI (week-over-week CI changes)
|
||||
# - TCH forecasted KPI (tonnage projections from harvest data)
|
||||
# - Growth decline KPI (trend analysis)
|
||||
# - Weed presence KPI (field fragmentation detection)
|
||||
# - Gap filling KPI (interpolation quality)
|
||||
# - KPI reporting (summary tables, field details, text interpretation)
|
||||
# - KPI export (Excel, RDS, data export)
|
||||
#
|
||||
# Orchestrator: calculate_all_kpis()
|
||||
# Dependencies: 00_common_utils.R (safe_log), sourced from common
|
||||
# Used by: 80_calculate_kpis.R (when client_type == "agronomic_support")
|
||||
# ============================================================================
|
||||
|
||||
library(terra)
|
||||
library(sf)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(readxl)
|
||||
library(writexl)
|
||||
library(spdep)
|
||||
library(caret)
|
||||
library(CAST)
|
||||
|
||||
# ============================================================================
|
||||
# SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R)
|
||||
# ============================================================================
|
||||
# The following helper functions have been moved to 80_utils_common.R:
|
||||
# - calculate_cv()
|
||||
# - calculate_change_percentages()
|
||||
# - calculate_spatial_autocorrelation()
|
||||
# - extract_ci_values()
|
||||
# - calculate_week_numbers()
|
||||
# - load_field_ci_raster()
|
||||
# - load_weekly_ci_mosaic()
|
||||
# - prepare_predictions()
|
||||
#
|
||||
# These are now sourced from common utils and shared by all client types.
|
||||
# ============================================================================
|
||||
|
||||
#' Prepare harvest predictions and ensure proper alignment with field data
|
||||
prepare_predictions <- function(harvest_model, field_data, scenario = "optimistic") {
|
||||
if (is.null(harvest_model) || is.null(field_data)) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
scenario_factor <- switch(scenario,
|
||||
"pessimistic" = 0.85,
|
||||
"realistic" = 1.0,
|
||||
"optimistic" = 1.15,
|
||||
1.0)
|
||||
|
||||
predictions <- field_data %>%
|
||||
mutate(tch_forecasted = field_data$mean_ci * scenario_factor)
|
||||
|
||||
return(predictions)
|
||||
}, error = function(e) {
|
||||
message(paste("Error preparing predictions:", e$message))
|
||||
return(NULL)
|
||||
})
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# AURA KPI CALCULATION FUNCTIONS (6 KPIS)
|
||||
# ============================================================================
|
||||
|
||||
#' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation
|
||||
#'
|
||||
#' Measures how uniform crop development is across the field.
|
||||
#' Low CV + high positive Moran's I = excellent uniformity
|
||||
#'
|
||||
#' @param ci_pixels_by_field List of CI pixel arrays for each field
|
||||
#' @param field_boundaries_sf SF object with field geometries
|
||||
#' @param ci_band Raster band with CI values
|
||||
#'
|
||||
#' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation
|
||||
calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL) {
|
||||
result <- data.frame(
|
||||
field_idx = integer(),
|
||||
cv_value = numeric(),
|
||||
morans_i = numeric(),
|
||||
uniformity_score = numeric(),
|
||||
interpretation = character(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
||||
ci_pixels <- ci_pixels_by_field[[field_idx]]
|
||||
|
||||
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
|
||||
result <- rbind(result, data.frame(
|
||||
field_idx = field_idx,
|
||||
cv_value = NA_real_,
|
||||
morans_i = NA_real_,
|
||||
uniformity_score = NA_real_,
|
||||
interpretation = "No data",
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
next
|
||||
}
|
||||
|
||||
cv_val <- calculate_cv(ci_pixels)
|
||||
|
||||
morans_i <- NA_real_
|
||||
if (!is.null(ci_band)) {
|
||||
morans_result <- calculate_spatial_autocorrelation(ci_pixels, field_boundaries_sf[field_idx, ])
|
||||
if (is.list(morans_result)) {
|
||||
morans_i <- morans_result$morans_i
|
||||
} else {
|
||||
morans_i <- morans_result
|
||||
}
|
||||
}
|
||||
|
||||
# Normalize CV (0-1 scale, invert so lower CV = higher score)
|
||||
cv_normalized <- min(cv_val / 0.3, 1) # 0.3 = threshold for CV
|
||||
cv_score <- 1 - cv_normalized
|
||||
|
||||
# Normalize Moran's I (-1 to 1 scale, shift to 0-1)
|
||||
morans_normalized <- if (!is.na(morans_i)) {
|
||||
(morans_i + 1) / 2
|
||||
} else {
|
||||
0.5
|
||||
}
|
||||
|
||||
uniformity_score <- 0.7 * cv_score + 0.3 * morans_normalized
|
||||
|
||||
# Interpretation
|
||||
if (is.na(cv_val)) {
|
||||
interpretation <- "No data"
|
||||
} else if (cv_val < 0.08) {
|
||||
interpretation <- "Excellent uniformity"
|
||||
} else if (cv_val < 0.15) {
|
||||
interpretation <- "Good uniformity"
|
||||
} else if (cv_val < 0.25) {
|
||||
interpretation <- "Acceptable uniformity"
|
||||
} else if (cv_val < 0.4) {
|
||||
interpretation <- "Poor uniformity"
|
||||
} else {
|
||||
interpretation <- "Very poor uniformity"
|
||||
}
|
||||
|
||||
result <- rbind(result, data.frame(
|
||||
field_idx = field_idx,
|
||||
cv_value = cv_val,
|
||||
morans_i = morans_i,
|
||||
uniformity_score = round(uniformity_score, 3),
|
||||
interpretation = interpretation,
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
}
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
#' KPI 2: Calculate area change metric (week-over-week CI changes)
|
||||
#'
|
||||
#' Tracks the percentage change in CI between current and previous week
|
||||
#'
|
||||
#' @param current_stats Current week field statistics (from extract_field_statistics_from_ci)
|
||||
#' @param previous_stats Previous week field statistics
|
||||
#'
|
||||
#' @return Data frame with field-level CI changes
|
||||
calculate_area_change_kpi <- function(current_stats, previous_stats) {
|
||||
result <- calculate_change_percentages(current_stats, previous_stats)
|
||||
|
||||
# Add interpretation
|
||||
result$interpretation <- NA_character_
|
||||
|
||||
for (i in seq_len(nrow(result))) {
|
||||
change <- result$mean_ci_pct_change[i]
|
||||
|
||||
if (is.na(change)) {
|
||||
result$interpretation[i] <- "No previous data"
|
||||
} else if (change > 15) {
|
||||
result$interpretation[i] <- "Rapid growth"
|
||||
} else if (change > 5) {
|
||||
result$interpretation[i] <- "Positive growth"
|
||||
} else if (change > -5) {
|
||||
result$interpretation[i] <- "Stable"
|
||||
} else if (change > -15) {
|
||||
result$interpretation[i] <- "Declining"
|
||||
} else {
|
||||
result$interpretation[i] <- "Rapid decline"
|
||||
}
|
||||
}
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
#' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare)
|
||||
#'
|
||||
#' Projects final harvest tonnage based on CI growth trajectory
|
||||
#'
|
||||
#' @param field_statistics Current field statistics
|
||||
#' @param harvesting_data Historical harvest data (with yield observations)
|
||||
#' @param field_boundaries_sf Field geometries
|
||||
#'
|
||||
#' @return Data frame with field-level TCH forecasts
|
||||
calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) {
|
||||
result <- data.frame(
|
||||
field_idx = field_statistics$field_idx,
|
||||
mean_ci = field_statistics$mean_ci,
|
||||
tch_forecasted = NA_real_,
|
||||
tch_lower_bound = NA_real_,
|
||||
tch_upper_bound = NA_real_,
|
||||
confidence = NA_character_,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
# Base TCH model: TCH = 50 + (CI * 10)
|
||||
# This is a simplified model; production use should include more variables
|
||||
|
||||
for (i in seq_len(nrow(result))) {
|
||||
if (is.na(result$mean_ci[i])) {
|
||||
result$confidence[i] <- "No data"
|
||||
next
|
||||
}
|
||||
|
||||
ci_val <- result$mean_ci[i]
|
||||
|
||||
# Simple linear model
|
||||
tch_est <- 50 + (ci_val * 10)
|
||||
|
||||
# Confidence interval based on CI range
|
||||
tch_lower <- tch_est * 0.85
|
||||
tch_upper <- tch_est * 1.15
|
||||
|
||||
result$tch_forecasted[i] <- round(tch_est, 2)
|
||||
result$tch_lower_bound[i] <- round(tch_lower, 2)
|
||||
result$tch_upper_bound[i] <- round(tch_upper, 2)
|
||||
result$confidence[i] <- "Medium"
|
||||
}
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
#' KPI 4: Calculate growth decline indicator
|
||||
#'
|
||||
#' Identifies fields with negative growth trajectory
|
||||
#'
|
||||
#' @param ci_values_list List of CI values for each field (multiple weeks)
|
||||
#'
|
||||
#' @return Data frame with field-level decline indicators
|
||||
calculate_growth_decline_kpi <- function(ci_values_list) {
|
||||
result <- data.frame(
|
||||
field_idx = seq_len(length(ci_values_list)),
|
||||
four_week_trend = NA_real_,
|
||||
trend_interpretation = NA_character_,
|
||||
decline_severity = NA_character_,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
for (field_idx in seq_len(length(ci_values_list))) {
|
||||
ci_vals <- ci_values_list[[field_idx]]
|
||||
|
||||
if (is.null(ci_vals) || length(ci_vals) < 2) {
|
||||
result$trend_interpretation[field_idx] <- "Insufficient data"
|
||||
next
|
||||
}
|
||||
|
||||
ci_vals <- ci_vals[!is.na(ci_vals)]
|
||||
if (length(ci_vals) < 2) {
|
||||
result$trend_interpretation[field_idx] <- "Insufficient data"
|
||||
next
|
||||
}
|
||||
|
||||
# Calculate linear trend
|
||||
weeks <- seq_along(ci_vals)
|
||||
lm_fit <- lm(ci_vals ~ weeks)
|
||||
slope <- coef(lm_fit)["weeks"]
|
||||
|
||||
result$four_week_trend[field_idx] <- round(as.numeric(slope), 3)
|
||||
|
||||
if (slope > 0.1) {
|
||||
result$trend_interpretation[field_idx] <- "Strong growth"
|
||||
result$decline_severity[field_idx] <- "None"
|
||||
} else if (slope > 0) {
|
||||
result$trend_interpretation[field_idx] <- "Weak growth"
|
||||
result$decline_severity[field_idx] <- "None"
|
||||
} else if (slope > -0.1) {
|
||||
result$trend_interpretation[field_idx] <- "Slight decline"
|
||||
result$decline_severity[field_idx] <- "Low"
|
||||
} else if (slope > -0.3) {
|
||||
result$trend_interpretation[field_idx] <- "Moderate decline"
|
||||
result$decline_severity[field_idx] <- "Medium"
|
||||
} else {
|
||||
result$trend_interpretation[field_idx] <- "Strong decline"
|
||||
result$decline_severity[field_idx] <- "High"
|
||||
}
|
||||
}
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
#' KPI 5: Calculate weed presence indicator
|
||||
#'
|
||||
#' Detects field fragmentation/patchiness (potential weed/pest pressure)
|
||||
#'
|
||||
#' @param ci_pixels_by_field List of CI pixel arrays for each field
|
||||
#'
|
||||
#' @return Data frame with fragmentation indicators
|
||||
calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
|
||||
result <- data.frame(
|
||||
field_idx = seq_len(length(ci_pixels_by_field)),
|
||||
cv_value = NA_real_,
|
||||
low_ci_percent = NA_real_,
|
||||
fragmentation_index = NA_real_,
|
||||
weed_pressure_risk = NA_character_,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
for (field_idx in seq_len(length(ci_pixels_by_field))) {
|
||||
ci_pixels <- ci_pixels_by_field[[field_idx]]
|
||||
|
||||
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
|
||||
result$weed_pressure_risk[field_idx] <- "No data"
|
||||
next
|
||||
}
|
||||
|
||||
ci_pixels <- ci_pixels[!is.na(ci_pixels)]
|
||||
if (length(ci_pixels) == 0) {
|
||||
result$weed_pressure_risk[field_idx] <- "No data"
|
||||
next
|
||||
}
|
||||
|
||||
cv_val <- calculate_cv(ci_pixels)
|
||||
low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100
|
||||
fragmentation <- cv_val * low_ci_pct / 100
|
||||
|
||||
result$cv_value[field_idx] <- cv_val
|
||||
result$low_ci_percent[field_idx] <- round(low_ci_pct, 2)
|
||||
result$fragmentation_index[field_idx] <- round(fragmentation, 3)
|
||||
|
||||
if (is.na(fragmentation)) {
|
||||
result$weed_pressure_risk[field_idx] <- "No data"
|
||||
} else if (fragmentation > 0.15) {
|
||||
result$weed_pressure_risk[field_idx] <- "High"
|
||||
} else if (fragmentation > 0.08) {
|
||||
result$weed_pressure_risk[field_idx] <- "Medium"
|
||||
} else if (fragmentation > 0.04) {
|
||||
result$weed_pressure_risk[field_idx] <- "Low"
|
||||
} else {
|
||||
result$weed_pressure_risk[field_idx] <- "Minimal"
|
||||
}
|
||||
}
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
#' KPI 6: Calculate gap filling quality (data interpolation success)
|
||||
#'
|
||||
#' Measures how well cloud/missing data was interpolated during growth model
|
||||
#'
|
||||
#' @param ci_rds_path Path to combined CI RDS file (before/after interpolation)
|
||||
#'
|
||||
#' @return Data frame with gap-filling quality metrics
|
||||
calculate_gap_filling_kpi <- function(ci_rds_path) {
|
||||
# If ci_rds_path is NULL or not a valid path, return placeholder
|
||||
if (is.null(ci_rds_path) || !is.character(ci_rds_path) || length(ci_rds_path) == 0) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# If ci_rds_path is a directory, find the cumulative CI file
|
||||
if (dir.exists(ci_rds_path)) {
|
||||
ci_files <- list.files(ci_rds_path, pattern = "^All_pivots.*\\.rds$", full.names = TRUE)
|
||||
if (length(ci_files) == 0) {
|
||||
return(NULL)
|
||||
}
|
||||
ci_rds_path <- ci_files[1]
|
||||
}
|
||||
|
||||
if (!file.exists(ci_rds_path)) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
ci_data <- readRDS(ci_rds_path)
|
||||
|
||||
# ci_data should be a wide matrix: fields × weeks
|
||||
# NA values = missing data before interpolation
|
||||
# (Gap filling is done during growth model stage)
|
||||
|
||||
result <- data.frame(
|
||||
field_idx = seq_len(nrow(ci_data)),
|
||||
na_percent_pre_interpolation = NA_real_,
|
||||
na_percent_post_interpolation = NA_real_,
|
||||
gap_filling_success = NA_character_,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
for (field_idx in seq_len(nrow(ci_data))) {
|
||||
na_count <- sum(is.na(ci_data[field_idx, ]))
|
||||
na_pct <- na_count / ncol(ci_data) * 100
|
||||
|
||||
if (na_pct == 0) {
|
||||
result$gap_filling_success[field_idx] <- "No gaps (100% data)"
|
||||
} else if (na_pct < 10) {
|
||||
result$gap_filling_success[field_idx] <- "Excellent"
|
||||
} else if (na_pct < 25) {
|
||||
result$gap_filling_success[field_idx] <- "Good"
|
||||
} else if (na_pct < 40) {
|
||||
result$gap_filling_success[field_idx] <- "Fair"
|
||||
} else {
|
||||
result$gap_filling_success[field_idx] <- "Poor"
|
||||
}
|
||||
|
||||
result$na_percent_pre_interpolation[field_idx] <- round(na_pct, 2)
|
||||
}
|
||||
|
||||
return(result)
|
||||
}, error = function(e) {
|
||||
message(paste("Error calculating gap filling KPI:", e$message))
|
||||
return(NULL)
|
||||
})
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# KPI ORCHESTRATOR AND REPORTING
|
||||
# ============================================================================
|
||||
|
||||
#' Create summary tables for all 6 KPIs
|
||||
#'
|
||||
#' @param all_kpis List containing results from all 6 KPI functions
|
||||
#'
|
||||
#' @return List of summary data frames ready for reporting
|
||||
create_summary_tables <- function(all_kpis) {
|
||||
kpi_summary <- list(
|
||||
uniformity = all_kpis$uniformity %>%
|
||||
select(field_idx, cv_value, morans_i, uniformity_score, interpretation),
|
||||
|
||||
area_change = all_kpis$area_change %>%
|
||||
select(field_idx, mean_ci_pct_change, interpretation),
|
||||
|
||||
tch_forecast = all_kpis$tch_forecasted %>%
|
||||
select(field_idx, mean_ci, tch_forecasted, tch_lower_bound, tch_upper_bound, confidence),
|
||||
|
||||
growth_decline = all_kpis$growth_decline %>%
|
||||
select(field_idx, four_week_trend, trend_interpretation, decline_severity),
|
||||
|
||||
weed_pressure = all_kpis$weed_presence %>%
|
||||
select(field_idx, fragmentation_index, weed_pressure_risk),
|
||||
|
||||
gap_filling = if (!is.null(all_kpis$gap_filling)) {
|
||||
all_kpis$gap_filling %>%
|
||||
select(field_idx, na_percent_pre_interpolation, gap_filling_success)
|
||||
} else {
|
||||
NULL
|
||||
}
|
||||
)
|
||||
|
||||
return(kpi_summary)
|
||||
}
|
||||
|
||||
#' Create detailed field-by-field KPI report
|
||||
#'
|
||||
#' @param field_df Data frame with field identifiers and acreage
|
||||
#' @param all_kpis List with all KPI results
|
||||
#' @param field_boundaries_sf SF object with field boundaries
|
||||
#'
|
||||
#' @return Data frame with one row per field, all KPI columns
|
||||
create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) {
|
||||
result <- field_df %>%
|
||||
left_join(
|
||||
all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_interpretation = interpretation),
|
||||
by = c("field_idx")
|
||||
) %>%
|
||||
left_join(
|
||||
all_kpis$area_change %>% select(field_idx, mean_ci_pct_change),
|
||||
by = c("field_idx")
|
||||
) %>%
|
||||
left_join(
|
||||
all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted),
|
||||
by = c("field_idx")
|
||||
) %>%
|
||||
left_join(
|
||||
all_kpis$growth_decline %>% select(field_idx, decline_severity),
|
||||
by = c("field_idx")
|
||||
) %>%
|
||||
left_join(
|
||||
all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk),
|
||||
by = c("field_idx")
|
||||
)
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
#' Generate KPI text interpretation for inclusion in Word report
|
||||
#'
|
||||
#' @param all_kpis List with all KPI results
|
||||
#'
|
||||
#' @return Character string with formatted KPI summary text
|
||||
create_field_kpi_text <- function(all_kpis) {
|
||||
text_parts <- c(
|
||||
"## AURA KPI ANALYSIS SUMMARY\n",
|
||||
"### Field Uniformity\n",
|
||||
paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n",
|
||||
"### Growth Trends\n",
|
||||
paste(all_kpis$growth_decline$trend_interpretation, collapse = "; "), "\n",
|
||||
"### Weed/Pest Pressure\n",
|
||||
paste(all_kpis$weed_presence$weed_pressure_risk, collapse = "; "), "\n"
|
||||
)
|
||||
|
||||
return(paste(text_parts, collapse = ""))
|
||||
}
|
||||
|
||||
#' Export detailed KPI data to Excel/RDS
|
||||
#'
|
||||
#' @param all_kpis List with all KPI results
|
||||
#' @param kpi_summary List with summary tables
|
||||
#' @param output_dir Directory for output files
|
||||
#' @param week Week number
|
||||
#' @param year Year
|
||||
#'
|
||||
#' @return List of output file paths
|
||||
export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year) {
|
||||
# Ensure output directory exists
|
||||
if (!dir.exists(output_dir)) {
|
||||
dir.create(output_dir, recursive = TRUE)
|
||||
}
|
||||
|
||||
# Export all KPI tables to a single Excel file
|
||||
excel_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".xlsx")
|
||||
|
||||
sheets <- list(
|
||||
"Uniformity" = as.data.frame(kpi_summary$uniformity),
|
||||
"Area_Change" = as.data.frame(kpi_summary$area_change),
|
||||
"TCH_Forecast" = as.data.frame(kpi_summary$tch_forecast),
|
||||
"Growth_Decline" = as.data.frame(kpi_summary$growth_decline),
|
||||
"Weed_Pressure" = as.data.frame(kpi_summary$weed_pressure),
|
||||
"Gap_Filling" = as.data.frame(kpi_summary$gap_filling)
|
||||
)
|
||||
|
||||
write_xlsx(sheets, excel_file)
|
||||
message(paste("✓ AURA KPI data exported to:", excel_file))
|
||||
|
||||
# Also export to RDS for programmatic access
|
||||
rds_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".rds")
|
||||
saveRDS(all_kpis, rds_file)
|
||||
message(paste("✓ AURA KPI RDS exported to:", rds_file))
|
||||
|
||||
return(list(excel = excel_file, rds = rds_file))
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# ORCHESTRATOR FUNCTION
|
||||
# ============================================================================
|
||||
|
||||
#' Calculate all 6 AURA KPIs
|
||||
#'
|
||||
#' Main entry point for AURA KPI calculation.
|
||||
#' This function orchestrates the 6 KPI calculations and returns all results.
|
||||
#'
|
||||
#' @param field_boundaries_sf SF object with field geometries
|
||||
#' @param current_week ISO week number (1-53)
|
||||
#' @param current_year ISO week year
|
||||
#' @param current_mosaic_dir Directory containing current week's mosaic
|
||||
#' @param previous_mosaic_dir Directory containing previous week's mosaic (optional)
|
||||
#' @param ci_rds_path Path to combined CI RDS file
|
||||
#' @param harvesting_data Data frame with harvest data (optional)
|
||||
#' @param output_dir Directory for KPI exports
|
||||
#'
|
||||
#' @return List with results from all 6 KPI functions
|
||||
#'
|
||||
#' @details
|
||||
#' This function:
|
||||
#' 1. Loads current week mosaic and extracts field statistics
|
||||
#' 2. (Optionally) loads previous week mosaic for comparison metrics
|
||||
#' 3. Calculates all 6 AURA KPIs
|
||||
#' 4. Creates summary tables
|
||||
#' 5. Exports results to Excel/RDS
|
||||
#'
|
||||
calculate_all_kpis <- function(
|
||||
field_boundaries_sf,
|
||||
current_week,
|
||||
current_year,
|
||||
current_mosaic_dir,
|
||||
previous_mosaic_dir = NULL,
|
||||
ci_rds_path = NULL,
|
||||
harvesting_data = NULL,
|
||||
output_dir = NULL
|
||||
) {
|
||||
|
||||
message("\n============ AURA KPI CALCULATION (6 KPIs) ============")
|
||||
|
||||
# Load current week mosaic
|
||||
message("Loading current week mosaic...")
|
||||
current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir)
|
||||
|
||||
if (is.null(current_mosaic)) {
|
||||
stop("Could not load current week mosaic")
|
||||
}
|
||||
|
||||
# Extract field statistics
|
||||
message("Extracting field statistics from current mosaic...")
|
||||
current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
|
||||
ci_pixels_by_field <- extract_ci_values(current_mosaic, field_boundaries_sf)
|
||||
|
||||
# Load previous week mosaic (if available)
|
||||
previous_stats <- NULL
|
||||
if (!is.null(previous_mosaic_dir)) {
|
||||
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
|
||||
message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")..."))
|
||||
previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir)
|
||||
|
||||
if (!is.null(previous_mosaic)) {
|
||||
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
|
||||
} else {
|
||||
message("Previous week mosaic not available - skipping area change KPI")
|
||||
}
|
||||
}
|
||||
|
||||
# Calculate 6 KPIs
|
||||
message("\nCalculating KPI 1: Field Uniformity...")
|
||||
uniformity_kpi <- calculate_field_uniformity_kpi(ci_pixels_by_field, field_boundaries_sf, current_mosaic)
|
||||
|
||||
message("Calculating KPI 2: Area Change...")
|
||||
if (!is.null(previous_stats)) {
|
||||
area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats)
|
||||
} else {
|
||||
area_change_kpi <- data.frame(
|
||||
field_idx = seq_len(nrow(field_boundaries_sf)),
|
||||
mean_ci_pct_change = NA_real_,
|
||||
interpretation = rep("No previous data", nrow(field_boundaries_sf))
|
||||
)
|
||||
}
|
||||
|
||||
message("Calculating KPI 3: TCH Forecasted...")
|
||||
tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf)
|
||||
|
||||
message("Calculating KPI 4: Growth Decline...")
|
||||
growth_decline_kpi <- calculate_growth_decline_kpi(
|
||||
list(ci_pixels_by_field) # Would need historical data for real trend
|
||||
)
|
||||
|
||||
message("Calculating KPI 5: Weed Presence...")
|
||||
weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field)
|
||||
|
||||
message("Calculating KPI 6: Gap Filling...")
|
||||
gap_filling_kpi <- calculate_gap_filling_kpi(ci_rds_path)
|
||||
|
||||
# Compile results
|
||||
all_kpis <- list(
|
||||
uniformity = uniformity_kpi,
|
||||
area_change = area_change_kpi,
|
||||
tch_forecasted = tch_kpi,
|
||||
growth_decline = growth_decline_kpi,
|
||||
weed_presence = weed_kpi,
|
||||
gap_filling = gap_filling_kpi
|
||||
)
|
||||
|
||||
# Create summary tables
|
||||
kpi_summary <- create_summary_tables(all_kpis)
|
||||
|
||||
# Export
|
||||
export_paths <- export_kpi_data(all_kpis, kpi_summary, output_dir, current_week, current_year)
|
||||
|
||||
message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year, "\n"))
|
||||
|
||||
return(all_kpis)
|
||||
}
|
||||
210
r_app/80_utils_cane_supply.R
Normal file
210
r_app/80_utils_cane_supply.R
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
# 80_UTILS_CANE_SUPPLY.R
|
||||
# ============================================================================
|
||||
# CANE SUPPLY CLIENT-SPECIFIC UTILITIES (SCRIPT 80 - CLIENT TYPE: cane_supply)
|
||||
#
|
||||
# Contains ANGATA and other cane supply-specific KPI and reporting functions.
|
||||
#
|
||||
# Currently, CANE_SUPPLY clients use the common utilities from 80_utils_common.R:
|
||||
# - Weekly statistics (calculate_field_statistics, calculate_kpi_trends)
|
||||
# - Field analysis reporting (generate_field_analysis_summary)
|
||||
# - Excel export (export_field_analysis_excel)
|
||||
#
|
||||
# This file is structured to accommodate future ANGATA-specific functionality such as:
|
||||
# - Custom yield models
|
||||
# - Harvest readiness criteria
|
||||
# - Supply chain integration hooks
|
||||
# - ANGATA-specific alerting and messaging
|
||||
#
|
||||
# Orchestrator: (Placeholder - uses common functions)
|
||||
# Dependencies: 00_common_utils.R, 80_utils_common.R
|
||||
# Used by: 80_calculate_kpis.R (when client_type == "cane_supply")
|
||||
# ============================================================================
|
||||
|
||||
library(terra)
|
||||
library(sf)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(readxl)
|
||||
library(writexl)
|
||||
|
||||
# ============================================================================
|
||||
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
|
||||
# ============================================================================
|
||||
|
||||
#' Placeholder: ANGATA harvest readiness assessment
|
||||
#'
|
||||
#' Future implementation will integrate ANGATA-specific harvest readiness criteria:
|
||||
#' - Maturation phase detection (CI threshold-based)
|
||||
#' - Field age tracking (days since planting)
|
||||
#' - Weather-based ripeness indicators (if available)
|
||||
#' - Historical yield correlations
|
||||
#'
|
||||
#' @param field_ci CI values for the field
|
||||
#' @param field_age_days Days since planting
|
||||
#'
|
||||
#' @return Character string with harvest readiness assessment
|
||||
assess_harvest_readiness <- function(field_ci, field_age_days = NULL) {
|
||||
# Placeholder implementation
|
||||
# Real version would check:
|
||||
# - Mean CI > 3.5 (maturation threshold)
|
||||
# - Age > 350 days
|
||||
# - Weekly growth rate < threshold (mature plateau)
|
||||
|
||||
if (is.null(field_ci) || all(is.na(field_ci))) {
|
||||
return("No data available")
|
||||
}
|
||||
|
||||
mean_ci <- mean(field_ci, na.rm = TRUE)
|
||||
|
||||
if (mean_ci > 3.5) {
|
||||
return("Ready for harvest")
|
||||
} else if (mean_ci > 2.5) {
|
||||
return("Approaching harvest readiness")
|
||||
} else {
|
||||
return("Not ready - continue monitoring")
|
||||
}
|
||||
}
|
||||
|
||||
#' Placeholder: ANGATA supply chain status flags
|
||||
#'
|
||||
#' Future implementation will add supply chain-specific status indicators:
|
||||
#' - Harvest scheduling readiness
|
||||
#' - Equipment availability impact
|
||||
#' - Transportation/logistics flags
|
||||
#' - Quality parameter flags
|
||||
#'
|
||||
#' @param field_analysis Data frame with field analysis results
|
||||
#'
|
||||
#' @return Data frame with supply chain status columns
|
||||
assess_supply_chain_status <- function(field_analysis) {
|
||||
# Placeholder: return field analysis as-is
|
||||
# Real version would add columns for:
|
||||
# - schedule_ready (bool)
|
||||
# - harvest_window_days (numeric)
|
||||
# - transportation_priority (char)
|
||||
# - quality_flags (char)
|
||||
|
||||
return(field_analysis)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
|
||||
# ============================================================================
|
||||
|
||||
#' Orchestrate ANGATA weekly field analysis and reporting
|
||||
#'
|
||||
#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows.
|
||||
#' Currently uses common utilities; future versions will add client-specific logic.
|
||||
#'
|
||||
#' @param field_boundaries_sf SF object with field geometries
|
||||
#' @param current_week ISO week number (1-53)
|
||||
#' @param current_year ISO week year
|
||||
#' @param mosaic_dir Directory containing weekly mosaics
|
||||
#' @param field_boundaries_path Path to field GeoJSON
|
||||
#' @param harvesting_data Data frame with harvest data (optional)
|
||||
#' @param output_dir Directory for exports
|
||||
#' @param data_dir Base data directory
|
||||
#'
|
||||
#' @return List with field analysis results
|
||||
#'
|
||||
#' @details
|
||||
#' This function:
|
||||
#' 1. Loads weekly mosaic and extracts field statistics
|
||||
#' 2. Calculates field statistics (using common utilities)
|
||||
#' 3. Prepares field analysis summary
|
||||
#' 4. Exports to Excel/CSV/RDS
|
||||
#' 5. (Future) Applies ANGATA-specific assessments
|
||||
#'
|
||||
calculate_field_analysis_cane_supply <- function(
|
||||
field_boundaries_sf,
|
||||
current_week,
|
||||
current_year,
|
||||
mosaic_dir,
|
||||
field_boundaries_path = NULL,
|
||||
harvesting_data = NULL,
|
||||
output_dir = file.path(PROJECT_DIR, "output"),
|
||||
data_dir = NULL
|
||||
) {
|
||||
|
||||
message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============")
|
||||
|
||||
# Load current week mosaic
|
||||
message("Loading current week mosaic...")
|
||||
current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year)
|
||||
|
||||
if (is.null(current_mosaic)) {
|
||||
warning(paste("Could not load current week mosaic for week", current_week, current_year))
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Extract field statistics
|
||||
message("Extracting field statistics from current mosaic...")
|
||||
field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
|
||||
|
||||
# Load previous week stats for comparison
|
||||
message("Loading historical data for trends...")
|
||||
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
|
||||
previous_stats <- NULL
|
||||
|
||||
previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year)
|
||||
if (!is.null(previous_mosaic)) {
|
||||
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
|
||||
}
|
||||
|
||||
# Calculate 4-week historical trend
|
||||
message("Calculating field trends...")
|
||||
ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds")
|
||||
|
||||
field_analysis <- calculate_field_statistics(
|
||||
field_stats = field_stats,
|
||||
previous_stats = previous_stats,
|
||||
week_num = current_week,
|
||||
year = current_year,
|
||||
ci_rds_path = ci_rds_path,
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
harvesting_data = harvesting_data
|
||||
)
|
||||
|
||||
if (is.null(field_analysis)) {
|
||||
message("Could not generate field analysis")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Generate summary
|
||||
message("Generating summary statistics...")
|
||||
summary_df <- generate_field_analysis_summary(field_analysis)
|
||||
|
||||
# Export
|
||||
message("Exporting field analysis...")
|
||||
export_paths <- export_field_analysis_excel(
|
||||
field_analysis,
|
||||
summary_df,
|
||||
PROJECT_DIR,
|
||||
current_week,
|
||||
current_year,
|
||||
output_dir
|
||||
)
|
||||
|
||||
message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n"))
|
||||
|
||||
result <- list(
|
||||
field_analysis = field_analysis,
|
||||
summary = summary_df,
|
||||
exports = export_paths
|
||||
)
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# FUTURE EXTENSION POINTS
|
||||
# ============================================================================
|
||||
|
||||
# Placeholder for ANGATA-specific utilities that may be added in future:
|
||||
# - Custom yield models based on ANGATA historical data
|
||||
# - Field condition thresholds specific to ANGATA growing practices
|
||||
# - Integration with ANGATA harvest scheduling system
|
||||
# - WhatsApp messaging templates for ANGATA supply chain stakeholders
|
||||
# - Cost/benefit analysis for ANGATA operational decisions
|
||||
|
||||
# These functions can be added here as ANGATA requirements evolve.
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -3,7 +3,7 @@ params:
|
|||
ref: "word-styles-reference-var1.docx"
|
||||
output_file: CI_report.docx
|
||||
report_date: "2026-01-22"
|
||||
data_dir: "aura"
|
||||
data_dir: "angata"
|
||||
mail_day: "Wednesday"
|
||||
borders: FALSE
|
||||
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
||||
|
|
@ -38,20 +38,26 @@ reporting_script <- TRUE
|
|||
|
||||
# Load all packages at once with suppressPackageStartupMessages
|
||||
suppressPackageStartupMessages({
|
||||
library(here)
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(tidyverse)
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
library(knitr)
|
||||
library(tidyr)
|
||||
library(flextable)
|
||||
# File path handling
|
||||
library(here) # For relative path resolution (platform-independent file paths)
|
||||
|
||||
# Spatial data handling
|
||||
library(sf) # For reading/manipulating field boundaries (GeoJSON)
|
||||
library(terra) # For raster operations (reading mosaic TIFFs for visualization)
|
||||
|
||||
# Data manipulation
|
||||
library(tidyverse) # For dplyr, ggplot2, tidyr (data wrangling and visualization)
|
||||
library(tidyr) # For data reshaping (pivot_longer, pivot_wider for wide-to-long conversion)
|
||||
library(lubridate) # For date/time operations (week extraction, date formatting)
|
||||
library(zoo) # For zoo objects (time series manipulation, na.locf for gap filling)
|
||||
|
||||
# Visualization
|
||||
library(tmap) # For interactive maps (field boundary visualization)
|
||||
|
||||
# Reporting
|
||||
library(knitr) # For R Markdown document generation (code execution and output)
|
||||
library(flextable) # For formatted tables in Word output (professional table styling)
|
||||
library(officer) # For Word document manipulation (custom formatting, headers, footers)
|
||||
})
|
||||
|
||||
# Load custom utility functions
|
||||
|
|
@ -101,10 +107,14 @@ project_dir <- params$data_dir
|
|||
# Source project parameters with error handling
|
||||
tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
source(here::here("r_app", "00_common_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading parameters_project.R: ", e$message)
|
||||
stop("Error loading project utilities: ", e$message)
|
||||
})
|
||||
|
||||
# Load centralized paths
|
||||
paths <- setup_project_directories(project_dir)
|
||||
|
||||
# Log initial configuration
|
||||
safe_log("Starting the R Markdown script with KPIs")
|
||||
safe_log(paste("mail_day params:", params$mail_day))
|
||||
|
|
@ -114,15 +124,14 @@ safe_log(paste("mail_day variable:", mail_day))
|
|||
|
||||
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||
# Primary expected directory inside the laravel storage
|
||||
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
||||
# Primary expected directory from centralized paths
|
||||
kpi_data_dir <- paths$kpi_reports_dir
|
||||
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
||||
|
||||
# Calculate current week from report_date using ISO 8601 week numbering
|
||||
report_date_obj <- as.Date(report_date)
|
||||
current_week <- lubridate::isoweek(report_date_obj)
|
||||
current_iso_year <- lubridate::isoyear(report_date_obj)
|
||||
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_iso_year)
|
||||
current_week <- as.numeric(format(as.Date(report_date), "%V"))
|
||||
current_year <- as.numeric(format(as.Date(report_date), "%G"))
|
||||
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
|
||||
|
||||
# Candidate filenames we expect (exact and common variants)
|
||||
expected_summary_names <- c(
|
||||
|
|
@ -346,50 +355,17 @@ safe_log(paste("Week range:", week_start, "to", week_end))
|
|||
```
|
||||
|
||||
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Load CI index data with error handling
|
||||
# Load CI quadrant data for field-level analysis
|
||||
tryCatch({
|
||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
|
||||
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
safe_log("Successfully loaded CI quadrant data")
|
||||
}, error = function(e) {
|
||||
stop("Error loading CI quadrant data: ", e$message)
|
||||
})
|
||||
|
||||
# Get file paths for different weeks using the utility function
|
||||
tryCatch({
|
||||
path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
|
||||
path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
|
||||
path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
|
||||
path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
|
||||
|
||||
# Log the calculated paths
|
||||
safe_log("Required mosaic paths:")
|
||||
safe_log(paste("Path to current week:", path_to_week_current))
|
||||
safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
|
||||
safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
|
||||
safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
|
||||
|
||||
# Validate that files exist
|
||||
if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
|
||||
if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
|
||||
if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
|
||||
if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
|
||||
|
||||
# Load raster data with terra functions
|
||||
CI <- terra::rast(path_to_week_current)$CI
|
||||
CI_m1 <- terra::rast(path_to_week_minus_1)$CI
|
||||
CI_m2 <- terra::rast(path_to_week_minus_2)$CI
|
||||
CI_m3 <- terra::rast(path_to_week_minus_3)$CI
|
||||
|
||||
# DEBUG: Check which weeks were actually loaded and their data ranges
|
||||
safe_log(paste("DEBUG - CI (current) range:", paste(terra::minmax(CI)[,1], collapse=" to ")))
|
||||
safe_log(paste("DEBUG - CI_m1 (week-1) range:", paste(terra::minmax(CI_m1)[,1], collapse=" to ")))
|
||||
safe_log(paste("DEBUG - CI_m2 (week-2) range:", paste(terra::minmax(CI_m2)[,1], collapse=" to ")))
|
||||
safe_log(paste("DEBUG - CI_m3 (week-3) range:", paste(terra::minmax(CI_m3)[,1], collapse=" to ")))
|
||||
|
||||
}, error = function(e) {
|
||||
stop("Error loading raster data: ", e$message)
|
||||
})
|
||||
# NOTE: Overview maps skipped for this report
|
||||
# Individual field sections load their own per-field mosaics directly
|
||||
```
|
||||
```
|
||||
|
||||
```{r compute_benchmarks_once, include=FALSE}
|
||||
|
|
@ -450,7 +426,7 @@ if (exists("summary_tables") && !is.null(summary_tables)) {
|
|||
|
||||
## Executive Summary - Key Performance Indicators
|
||||
|
||||
```{r combined_kpi_table, echo=FALSE}
|
||||
```{r combined_kpi_table, echo=FALSE, results='asis'}
|
||||
# Combine all KPI tables into a single table with standardized column names
|
||||
display_names <- c(
|
||||
field_uniformity_summary = "Field Uniformity",
|
||||
|
|
@ -504,7 +480,7 @@ ft
|
|||
|
||||
## Field Alerts
|
||||
|
||||
```{r field_alerts_table, echo=FALSE}
|
||||
```{r field_alerts_table, echo=FALSE, results='asis'}
|
||||
# Generate alerts for all fields
|
||||
generate_field_alerts <- function(field_details_table) {
|
||||
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
|
||||
|
|
@ -610,42 +586,13 @@ if (exists("field_details_table") && !is.null(field_details_table)) {
|
|||
}
|
||||
```
|
||||
|
||||
|
||||
```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE}
|
||||
# Calculate difference rasters for comparisons
|
||||
# When one week has NA values, the difference will also be NA (not zero)
|
||||
# Initialize placeholders first to ensure they exist
|
||||
last_week_dif_raster_abs <- NULL
|
||||
three_week_dif_raster_abs <- NULL
|
||||
|
||||
tryCatch({
|
||||
# Always calculate differences - NA values will propagate naturally
|
||||
# This way empty weeks (all NA) result in NA differences, not misleading zeros
|
||||
last_week_dif_raster_abs <- (CI - CI_m1)
|
||||
three_week_dif_raster_abs <- (CI - CI_m3)
|
||||
|
||||
safe_log("Calculated difference rasters (NA values preserved)")
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error calculating difference rasters:", e$message), "ERROR")
|
||||
# Fallback: create NA placeholders if calculation fails
|
||||
if (is.null(last_week_dif_raster_abs)) {
|
||||
last_week_dif_raster_abs <- CI * NA
|
||||
}
|
||||
if (is.null(three_week_dif_raster_abs)) {
|
||||
three_week_dif_raster_abs <- CI * NA
|
||||
}
|
||||
})
|
||||
|
||||
# Final safety check - ensure variables exist in global environment
|
||||
if (is.null(last_week_dif_raster_abs)) {
|
||||
last_week_dif_raster_abs <- CI * NA
|
||||
safe_log("Created NA placeholder for last_week_dif_raster_abs", "WARNING")
|
||||
}
|
||||
if (is.null(three_week_dif_raster_abs)) {
|
||||
three_week_dif_raster_abs <- CI * NA
|
||||
safe_log("Created NA placeholder for three_week_dif_raster_abs", "WARNING")
|
||||
```{r data, message=TRUE, warning=TRUE, include=FALSE}
|
||||
# Verify CI quadrant data is loaded from load_ci_data chunk
|
||||
if (!exists("CI_quadrant") || is.null(CI_quadrant)) {
|
||||
stop("CI_quadrant data not available - check load_ci_data chunk")
|
||||
}
|
||||
safe_log("CI quadrant data verified for field-level analysis")
|
||||
```
|
||||
```
|
||||
|
||||
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
|
||||
|
|
@ -665,76 +612,6 @@ tryCatch({
|
|||
stop("Error loading field boundaries: ", e$message)
|
||||
})
|
||||
```
|
||||
\newpage
|
||||
|
||||
## Chlorophyll Index (CI) Overview Map - Current Week
|
||||
```{r render_ci_overview_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE}
|
||||
# Create overview chlorophyll index map
|
||||
tryCatch({
|
||||
# Choose palette based on colorblind_friendly parameter
|
||||
ci_palette <- if (colorblind_friendly) "viridis" else "brewer.rd_yl_gn"
|
||||
|
||||
# Base shape
|
||||
map <- tmap::tm_shape(CI, unit = "m")
|
||||
|
||||
# Add raster layer with continuous spectrum (fixed scale 1-8 for consistent comparison)
|
||||
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = ci_palette,
|
||||
limits = c(1, 8)),
|
||||
col.legend = tm_legend(title = "Chlorophyll Index (CI)",
|
||||
orientation = "landscape",
|
||||
position = tm_pos_out("center", "bottom")))
|
||||
# Complete the map with layout and other elements
|
||||
map <- map +
|
||||
tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_shape(AllPivots0) +
|
||||
tmap::tm_borders(col = "black") +
|
||||
tmap::tm_text("sub_field", size = 0.6, col = "black")
|
||||
|
||||
# Print the map
|
||||
print(map)
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error creating CI overview map:", e$message), "ERROR")
|
||||
plot(1, type="n", axes=FALSE, xlab="", ylab="")
|
||||
text(1, 1, "Error creating CI overview map", cex=1.5)
|
||||
})
|
||||
|
||||
```
|
||||
\newpage
|
||||
|
||||
## Weekly Chlorophyll Index Difference Map
|
||||
```{r render_ci_difference_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE}
|
||||
# Create chlorophyll index difference map
|
||||
tryCatch({
|
||||
# Choose palette based on colorblind_friendly parameter
|
||||
diff_palette <- if (colorblind_friendly) "plasma" else "brewer.rd_yl_gn"
|
||||
|
||||
# Base shape
|
||||
map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m")
|
||||
|
||||
# Add raster layer with continuous spectrum (centered at 0 for difference maps, fixed scale)
|
||||
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = diff_palette,
|
||||
midpoint = 0,
|
||||
limits = c(-3, 3)),
|
||||
col.legend = tm_legend(title = "Chlorophyll Index (CI) Change",
|
||||
orientation = "landscape",
|
||||
position = tm_pos_out("center", "bottom")))
|
||||
# Complete the map with layout and other elements
|
||||
map <- map +
|
||||
tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_shape(AllPivots0) +
|
||||
tmap::tm_borders(col = "black") +
|
||||
tmap::tm_text("sub_field", size = 0.6, col = "black")
|
||||
|
||||
# Print the map
|
||||
print(map)
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error creating CI difference map:", e$message), "ERROR")
|
||||
plot(1, type="n", axes=FALSE, xlab="", ylab="")
|
||||
text(1, 1, "Error creating CI difference map", cex=1.5)
|
||||
})
|
||||
```
|
||||
|
||||
# Section 2: Field-by-Field Analysis
|
||||
|
||||
|
|
@ -760,6 +637,23 @@ tryCatch({
|
|||
dplyr::group_by(field) %>%
|
||||
dplyr::summarise(.groups = 'drop')
|
||||
|
||||
# Use per-field weekly mosaic directory path from parameters_project.R
|
||||
weekly_mosaic_per_field_dir <- weekly_CI_mosaic
|
||||
|
||||
# Helper to get week/year from a date
|
||||
get_week_year <- function(date) {
|
||||
list(
|
||||
week = as.numeric(format(date, "%V")),
|
||||
year = as.numeric(format(date, "%G"))
|
||||
)
|
||||
}
|
||||
|
||||
# Get week/year for current and historical weeks (local to field section)
|
||||
current_ww <- get_week_year(as.Date(today))
|
||||
minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1))
|
||||
minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2))
|
||||
minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3))
|
||||
|
||||
# Generate plots for each field
|
||||
for(i in seq_along(AllPivots_merged$field)) {
|
||||
field_name <- AllPivots_merged$field[i]
|
||||
|
|
@ -775,15 +669,71 @@ tryCatch({
|
|||
cat("\\newpage\n\n")
|
||||
}
|
||||
|
||||
# Call ci_plot with explicit parameters (ci_plot will generate its own header)
|
||||
# Load per-field mosaics directly for this field
|
||||
field_CI <- NULL
|
||||
field_CI_m1 <- NULL
|
||||
field_CI_m2 <- NULL
|
||||
field_CI_m3 <- NULL
|
||||
|
||||
tryCatch({
|
||||
# Load per-field mosaic for current week
|
||||
per_field_path_current <- get_per_field_mosaic_path(
|
||||
weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year
|
||||
)
|
||||
if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) {
|
||||
field_CI <- terra::rast(per_field_path_current)[["CI"]]
|
||||
}
|
||||
|
||||
# Load per-field mosaic for week-1
|
||||
per_field_path_m1 <- get_per_field_mosaic_path(
|
||||
weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year
|
||||
)
|
||||
if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) {
|
||||
field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]]
|
||||
}
|
||||
|
||||
# Load per-field mosaic for week-2
|
||||
per_field_path_m2 <- get_per_field_mosaic_path(
|
||||
weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year
|
||||
)
|
||||
if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) {
|
||||
field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]]
|
||||
}
|
||||
|
||||
# Load per-field mosaic for week-3
|
||||
per_field_path_m3 <- get_per_field_mosaic_path(
|
||||
weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year
|
||||
)
|
||||
if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) {
|
||||
field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]]
|
||||
}
|
||||
|
||||
safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG")
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING")
|
||||
})
|
||||
|
||||
# Calculate difference rasters from per-field data (local to this field)
|
||||
last_week_dif_raster_field <- NULL
|
||||
three_week_dif_raster_field <- NULL
|
||||
|
||||
if (!is.null(field_CI) && !is.null(field_CI_m1)) {
|
||||
last_week_dif_raster_field <- field_CI - field_CI_m1
|
||||
}
|
||||
if (!is.null(field_CI) && !is.null(field_CI_m3)) {
|
||||
three_week_dif_raster_field <- field_CI - field_CI_m3
|
||||
}
|
||||
|
||||
# Call ci_plot with field-specific rasters
|
||||
ci_plot(
|
||||
pivotName = field_name,
|
||||
field_boundaries = AllPivots0,
|
||||
current_ci = CI,
|
||||
ci_minus_1 = CI_m1,
|
||||
ci_minus_2 = CI_m2,
|
||||
last_week_diff = last_week_dif_raster_abs,
|
||||
three_week_diff = three_week_dif_raster_abs,
|
||||
current_ci = field_CI,
|
||||
ci_minus_1 = field_CI_m1,
|
||||
ci_minus_2 = field_CI_m2,
|
||||
last_week_diff = last_week_dif_raster_field,
|
||||
three_week_diff = three_week_dif_raster_field,
|
||||
harvesting_data = harvesting_data,
|
||||
week = week,
|
||||
week_minus_1 = week_minus_1,
|
||||
|
|
@ -882,9 +832,9 @@ tryCatch({
|
|||
|
||||
The following table provides a comprehensive overview of all monitored fields with their key performance metrics from the KPI analysis.
|
||||
|
||||
```{r detailed_field_table, echo=FALSE}
|
||||
```{r detailed_field_table, echo=FALSE, results='asis'}
|
||||
# Load CI quadrant data to get field ages
|
||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
|
||||
# Identify the current season for each field based on report_date
|
||||
# The current season is the one where the report_date falls within or shortly after the season
|
||||
|
|
@ -1027,7 +977,7 @@ Comparing the current season to these lines helps assess whether crop growth is
|
|||
\newpage
|
||||
## Report Metadata
|
||||
|
||||
```{r report_metadata, echo=FALSE}
|
||||
```{r report_metadata, echo=FALSE, results='asis'}
|
||||
metadata_info <- data.frame(
|
||||
Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"),
|
||||
Value = c(
|
||||
|
|
|
|||
|
|
@ -1,584 +0,0 @@
|
|||
---
|
||||
params:
|
||||
ref: "word-styles-reference-var1.docx"
|
||||
output_file: CI_report.docx
|
||||
report_date: "2025-09-30"
|
||||
data_dir: "aura"
|
||||
mail_day: "Wednesday"
|
||||
borders: FALSE
|
||||
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
||||
colorblind_friendly: TRUE # use colorblind-friendly palettes (viridis/plasma)
|
||||
facet_by_season: FALSE # facet CI trend plots by season instead of overlaying
|
||||
x_axis_unit: "days" # x-axis unit for trend plots: "days" or "weeks"
|
||||
output:
|
||||
word_document:
|
||||
reference_docx: !expr file.path("word-styles-reference-var1.docx")
|
||||
toc: no
|
||||
editor_options:
|
||||
chunk_output_type: console
|
||||
---
|
||||
|
||||
```{r setup_parameters, include=FALSE}
|
||||
# Set up basic report parameters from input values
|
||||
report_date <- params$report_date
|
||||
mail_day <- params$mail_day
|
||||
borders <- params$borders
|
||||
ci_plot_type <- params$ci_plot_type
|
||||
colorblind_friendly <- params$colorblind_friendly
|
||||
facet_by_season <- params$facet_by_season
|
||||
x_axis_unit <- params$x_axis_unit
|
||||
```
|
||||
|
||||
```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Configure knitr options
|
||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
||||
|
||||
# Set flag for reporting scripts to use pivot.geojson instead of pivot_2.geojson
|
||||
reporting_script <- TRUE
|
||||
|
||||
# Load all packages at once with suppressPackageStartupMessages
|
||||
suppressPackageStartupMessages({
|
||||
library(here)
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(tidyverse)
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
library(knitr)
|
||||
library(tidyr)
|
||||
library(flextable)
|
||||
library(officer)
|
||||
})
|
||||
|
||||
# Load custom utility functions
|
||||
tryCatch({
|
||||
source("report_utils.R")
|
||||
}, error = function(e) {
|
||||
message(paste("Error loading report_utils.R:", e$message))
|
||||
# Try alternative path if the first one fails
|
||||
tryCatch({
|
||||
source(here::here("r_app", "report_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Could not load report_utils.R from either location: ", e$message)
|
||||
})
|
||||
})
|
||||
|
||||
# Function to determine field priority level based on CV and Moran's I
|
||||
# Returns: 1=Urgent, 2=Monitor, 3=No stress
|
||||
get_field_priority_level <- function(cv, morans_i) {
|
||||
# Handle NA values
|
||||
if (is.na(cv) || is.na(morans_i)) return(3) # Default to no stress
|
||||
|
||||
# Determine priority based on thresholds
|
||||
if (cv < 0.1) {
|
||||
if (morans_i < 0.7) {
|
||||
return(3) # No stress
|
||||
} else if (morans_i <= 0.9) {
|
||||
return(2) # Monitor (young field with some clustering)
|
||||
} else {
|
||||
return(1) # Urgent
|
||||
}
|
||||
} else if (cv <= 0.15) {
|
||||
if (morans_i < 0.7) {
|
||||
return(2) # Monitor
|
||||
} else {
|
||||
return(1) # Urgent
|
||||
}
|
||||
} else { # cv > 0.15
|
||||
return(1) # Urgent
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Set the project directory from parameters
|
||||
project_dir <- params$data_dir
|
||||
|
||||
# Source project parameters with error handling
|
||||
tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading parameters_project.R: ", e$message)
|
||||
})
|
||||
|
||||
# Log initial configuration
|
||||
safe_log("Starting the R Markdown script with KPIs - NO TABLES VERSION")
|
||||
safe_log(paste("mail_day params:", params$mail_day))
|
||||
safe_log(paste("report_date params:", params$report_date))
|
||||
safe_log(paste("mail_day variable:", mail_day))
|
||||
```
|
||||
|
||||
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||
# Primary expected directory inside the laravel storage
|
||||
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
||||
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
||||
|
||||
# Calculate current week from report_date using ISO 8601 week numbering
|
||||
current_week <- as.numeric(format(as.Date(report_date), "%V"))
|
||||
current_year <- as.numeric(format(as.Date(report_date), "%G"))
|
||||
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
|
||||
|
||||
# Candidate filenames we expect (exact and common variants)
|
||||
expected_summary_names <- c(
|
||||
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
|
||||
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
|
||||
paste0(project_dir, "_kpi_summary_tables.rds"),
|
||||
"kpi_summary_tables.rds",
|
||||
paste0("kpi_summary_tables_", week_suffix, ".rds"),
|
||||
paste0("kpi_summary_tables_", date_suffix, ".rds")
|
||||
)
|
||||
|
||||
expected_field_details_names <- c(
|
||||
paste0(project_dir, "_field_details_", week_suffix, ".rds"),
|
||||
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
|
||||
paste0(project_dir, "_field_details.rds"),
|
||||
"field_details.rds"
|
||||
)
|
||||
|
||||
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
|
||||
try_load_from_dir <- function(dir, candidates) {
|
||||
if (!dir.exists(dir)) return(NULL)
|
||||
for (name in candidates) {
|
||||
f <- file.path(dir, name)
|
||||
if (file.exists(f)) return(f)
|
||||
}
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Try primary directory first
|
||||
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
|
||||
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
|
||||
|
||||
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
|
||||
if (is.null(summary_file) || is.null(field_details_file)) {
|
||||
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
|
||||
# List rds files under laravel_app/storage/app recursively
|
||||
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
|
||||
# Try to match by expected names
|
||||
if (is.null(summary_file)) {
|
||||
matched <- files[basename(files) %in% expected_summary_names]
|
||||
if (length(matched) > 0) summary_file <- matched[1]
|
||||
}
|
||||
if (is.null(field_details_file)) {
|
||||
matched2 <- files[basename(files) %in% expected_field_details_names]
|
||||
if (length(matched2) > 0) field_details_file <- matched2[1]
|
||||
}
|
||||
}
|
||||
|
||||
# Final checks and load with safe error messages
|
||||
kpi_files_exist <- FALSE
|
||||
if (!is.null(summary_file) && file.exists(summary_file)) {
|
||||
safe_log(paste("Loading KPI summary from:", summary_file))
|
||||
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
|
||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
||||
} else {
|
||||
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
|
||||
}
|
||||
|
||||
if (!is.null(field_details_file) && file.exists(field_details_file)) {
|
||||
safe_log(paste("Loading field details from:", field_details_file))
|
||||
field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL })
|
||||
if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE
|
||||
} else {
|
||||
safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING")
|
||||
}
|
||||
|
||||
if (kpi_files_exist) {
|
||||
safe_log("✓ KPI summary tables loaded successfully")
|
||||
} else {
|
||||
safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING")
|
||||
}
|
||||
|
||||
#' Generate field-specific KPI summary for display in reports
|
||||
#' @param field_name Name of the field to summarize
|
||||
#' @param field_details_table Data frame with field-level KPI details
|
||||
#' @return Formatted text string with field KPI summary
|
||||
generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadrant) {
|
||||
tryCatch({
|
||||
# Get field age from CI quadrant data for the CURRENT SEASON only
|
||||
# First identify the current season for this field
|
||||
current_season <- CI_quadrant %>%
|
||||
filter(field == field_name, Date <= as.Date(report_date)) %>%
|
||||
group_by(season) %>%
|
||||
summarise(season_end = max(Date), .groups = 'drop') %>%
|
||||
filter(season == max(season)) %>%
|
||||
pull(season)
|
||||
|
||||
# Get the most recent DOY from the current season
|
||||
field_age <- CI_quadrant %>%
|
||||
filter(field == field_name, season == current_season) %>%
|
||||
pull(DOY) %>%
|
||||
max(na.rm = TRUE)
|
||||
|
||||
# Filter data for this specific field
|
||||
field_data <- field_details_table %>%
|
||||
filter(Field == field_name)
|
||||
|
||||
if (nrow(field_data) == 0) {
|
||||
return(paste("**Field", field_name, "KPIs:** Data not available"))
|
||||
}
|
||||
|
||||
# Aggregate sub-field data for field-level summary
|
||||
# For categorical data, take the most common value or highest risk level
|
||||
field_summary <- field_data %>%
|
||||
summarise(
|
||||
field_size = sum(`Field Size (ha)`, na.rm = TRUE),
|
||||
uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"),
|
||||
avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)),
|
||||
max_gap_score = max(`Gap Score`, na.rm = TRUE),
|
||||
highest_decline_risk = case_when(
|
||||
any(`Decline Risk` == "Very-high") ~ "Very-high",
|
||||
any(`Decline Risk` == "High") ~ "High",
|
||||
any(`Decline Risk` == "Moderate") ~ "Moderate",
|
||||
any(`Decline Risk` == "Low") ~ "Low",
|
||||
TRUE ~ "Unknown"
|
||||
),
|
||||
highest_weed_risk = case_when(
|
||||
any(`Weed Risk` == "High") ~ "High",
|
||||
any(`Weed Risk` == "Moderate") ~ "Moderate",
|
||||
any(`Weed Risk` == "Low") ~ "Low",
|
||||
TRUE ~ "Unknown"
|
||||
),
|
||||
avg_mean_ci = mean(`Mean CI`, na.rm = TRUE),
|
||||
avg_cv = mean(`CV Value`, na.rm = TRUE),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
# Apply age-based filtering to yield forecast
|
||||
if (is.na(field_age) || field_age < 240) {
|
||||
field_summary$avg_yield_forecast <- NA_real_
|
||||
}
|
||||
|
||||
# Format the summary text
|
||||
yield_text <- if (is.na(field_summary$avg_yield_forecast)) {
|
||||
"Yield Forecast: NA"
|
||||
} else {
|
||||
paste0("Yield Forecast: ", round(field_summary$avg_yield_forecast, 1), " t/ha")
|
||||
}
|
||||
|
||||
kpi_text <- paste0(
|
||||
"Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels,
|
||||
" | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1),
|
||||
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk,
|
||||
" | Mean CI: ", round(field_summary$avg_mean_ci, 2)
|
||||
)
|
||||
|
||||
kpi_text <- paste0("<span style='font-size:10pt'>", kpi_text, "</span>")
|
||||
|
||||
return(kpi_text)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error generating KPI summary for field", field_name, ":", e$message), "ERROR")
|
||||
return(paste("**Field", field_name, "KPIs:** Error generating summary"))
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Set locale for consistent date formatting
|
||||
Sys.setlocale("LC_TIME", "C")
|
||||
|
||||
# Initialize date variables from parameters
|
||||
today <- as.character(report_date)
|
||||
mail_day_as_character <- as.character(mail_day)
|
||||
|
||||
# Calculate report dates and weeks using ISO 8601 week numbering
|
||||
report_date_obj <- as.Date(today)
|
||||
current_week <- as.numeric(format(report_date_obj, "%V"))
|
||||
year <- as.numeric(format(report_date_obj, "%Y"))
|
||||
|
||||
# Calculate dates for weekly analysis
|
||||
week_start <- report_date_obj - ((as.numeric(format(report_date_obj, "%w")) + 1) %% 7)
|
||||
week_end <- week_start + 6
|
||||
|
||||
# Calculate week days (copied from 05 script for compatibility)
|
||||
report_date_as_week_day <- weekdays(lubridate::ymd(today))
|
||||
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
|
||||
|
||||
# Calculate initial week number
|
||||
week <- lubridate::week(today)
|
||||
safe_log(paste("Initial week calculation:", week, "today:", today))
|
||||
|
||||
# Calculate previous dates for comparisons
|
||||
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
|
||||
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
|
||||
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
|
||||
|
||||
# Adjust week calculation based on mail day
|
||||
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
|
||||
safe_log("Adjusting weeks because of mail day")
|
||||
week <- lubridate::week(today) + 1
|
||||
today_minus_1 <- as.character(lubridate::ymd(today))
|
||||
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
|
||||
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
|
||||
}
|
||||
|
||||
# Calculate week numbers for previous weeks
|
||||
week_minus_1 <- week - 1
|
||||
week_minus_2 <- week - 2
|
||||
week_minus_3 <- week - 3
|
||||
|
||||
# Format current week with leading zeros
|
||||
week <- sprintf("%02d", week)
|
||||
|
||||
safe_log(paste("Report week:", current_week, "Year:", year))
|
||||
safe_log(paste("Week range:", week_start, "to", week_end))
|
||||
```
|
||||
|
||||
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Load CI quadrant data for field-level analysis
|
||||
tryCatch({
|
||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
safe_log("Successfully loaded CI quadrant data")
|
||||
}, error = function(e) {
|
||||
stop("Error loading CI quadrant data: ", e$message)
|
||||
})
|
||||
|
||||
# NOTE: Overview maps skipped for this report
|
||||
# Individual field sections load their own per-field mosaics directly
|
||||
```
|
||||
```
|
||||
|
||||
```{r compute_benchmarks_once, include=FALSE}
|
||||
# Compute CI benchmarks once for the entire estate
|
||||
benchmarks <- compute_ci_benchmarks(CI_quadrant, project_dir, c(10, 50, 90))
|
||||
if (!is.null(benchmarks)) {
|
||||
safe_log("Benchmarks computed successfully for the report")
|
||||
} else {
|
||||
safe_log("Failed to compute benchmarks", "WARNING")
|
||||
}
|
||||
```
|
||||
|
||||
## Report Summary
|
||||
|
||||
**Farm Location:** `r toupper(project_dir)` Estate
|
||||
**Report Period:** Week `r current_week` of `r year`
|
||||
**Data Source:** Planet Labs Satellite Imagery
|
||||
**Analysis Type:** Chlorophyll Index (CI) Monitoring
|
||||
**Report Generated on:** `r format(Sys.time(), "%B %d, %Y at %H:%M")`
|
||||
|
||||
**NOTE: THIS IS A NO-TABLES VERSION FOR DIAGNOSTIC PURPOSES - MAPS AND GRAPHS ONLY**
|
||||
|
||||
\newpage
|
||||
|
||||
# Section 2: Field-by-Field Analysis
|
||||
|
||||
## Overview of Field-Level Insights
|
||||
This section provides detailed, field-specific analyses including chlorophyll index maps, trend graphs, and performance metrics. Each field is analyzed individually to support targeted interventions.
|
||||
|
||||
**Key Elements per Field:**
|
||||
- Current and historical CI maps
|
||||
- Week-over-week change visualizations
|
||||
- Cumulative growth trends
|
||||
- Field-specific KPI summaries
|
||||
|
||||
*Navigate to the following pages for individual field reports.*
|
||||
|
||||
\newpage
|
||||
|
||||
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
|
||||
# Load field boundaries from parameters
|
||||
tryCatch({
|
||||
AllPivots0 <- field_boundaries_sf %>%
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names
|
||||
safe_log("Successfully loaded field boundaries")
|
||||
|
||||
# Prepare merged field list for use in summaries
|
||||
AllPivots_merged <- AllPivots0 %>%
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names
|
||||
dplyr::group_by(field) %>%
|
||||
dplyr::summarise(.groups = 'drop')
|
||||
|
||||
}, error = function(e) {
|
||||
stop("Error loading field boundaries: ", e$message)
|
||||
})
|
||||
```
|
||||
|
||||
```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, dpi=300, dev='png', message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'}
|
||||
# Generate detailed visualizations for each field
|
||||
tryCatch({
|
||||
# Merge field polygons for processing and filter out NA field names
|
||||
AllPivots_merged <- AllPivots0 %>%
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA fields
|
||||
dplyr::group_by(field) %>%
|
||||
dplyr::summarise(.groups = 'drop')
|
||||
|
||||
# Use per-field weekly mosaic directory path from parameters_project.R
|
||||
weekly_mosaic_per_field_dir <- weekly_CI_mosaic
|
||||
|
||||
# Helper to get week/year from a date
|
||||
get_week_year <- function(date) {
|
||||
list(
|
||||
week = as.numeric(format(date, "%V")),
|
||||
year = as.numeric(format(date, "%G"))
|
||||
)
|
||||
}
|
||||
|
||||
# Get week/year for current and historical weeks (local to field section)
|
||||
current_ww <- get_week_year(as.Date(today))
|
||||
minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1))
|
||||
minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2))
|
||||
minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3))
|
||||
|
||||
# Debug: check how many fields we're iterating
|
||||
safe_log(paste("Starting visualization loop for", nrow(AllPivots_merged), "fields"), "DEBUG")
|
||||
safe_log(paste("Fields to process:", paste(AllPivots_merged$field, collapse=", ")), "DEBUG")
|
||||
|
||||
# Generate plots for each field
|
||||
for(i in seq_along(AllPivots_merged$field)) {
|
||||
field_name <- AllPivots_merged$field[i]
|
||||
safe_log(paste("Processing field", i, "of", nrow(AllPivots_merged), ":", field_name), "DEBUG")
|
||||
|
||||
# Skip if field_name is still NA (double check)
|
||||
if(is.na(field_name)) {
|
||||
safe_log(paste("Skipping field", i, "- NA name"), "DEBUG")
|
||||
next
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
# Add page break before each field (except the first one)
|
||||
if(i > 1) {
|
||||
cat("\\newpage\n\n")
|
||||
}
|
||||
|
||||
# Load per-field mosaics directly for this field
|
||||
field_CI <- NULL
|
||||
field_CI_m1 <- NULL
|
||||
field_CI_m2 <- NULL
|
||||
field_CI_m3 <- NULL
|
||||
|
||||
tryCatch({
|
||||
# Load per-field mosaic for current week
|
||||
per_field_path_current <- get_per_field_mosaic_path(
|
||||
weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year
|
||||
)
|
||||
safe_log(paste("Looking for mosaic at:", per_field_path_current, "exists?", file.exists(per_field_path_current %||% "")), "DEBUG")
|
||||
if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) {
|
||||
field_CI <- terra::rast(per_field_path_current)[["CI"]]
|
||||
safe_log(paste("Successfully loaded field_CI for", field_name), "DEBUG")
|
||||
} else {
|
||||
safe_log(paste("Could not load field_CI for", field_name, "- file not found"), "DEBUG")
|
||||
}
|
||||
|
||||
# Load per-field mosaic for week-1
|
||||
per_field_path_m1 <- get_per_field_mosaic_path(
|
||||
weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year
|
||||
)
|
||||
if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) {
|
||||
field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]]
|
||||
}
|
||||
|
||||
# Load per-field mosaic for week-2
|
||||
per_field_path_m2 <- get_per_field_mosaic_path(
|
||||
weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year
|
||||
)
|
||||
if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) {
|
||||
field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]]
|
||||
}
|
||||
|
||||
# Load per-field mosaic for week-3
|
||||
per_field_path_m3 <- get_per_field_mosaic_path(
|
||||
weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year
|
||||
)
|
||||
if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) {
|
||||
field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]]
|
||||
}
|
||||
|
||||
safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG")
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING")
|
||||
})
|
||||
|
||||
# Calculate difference rasters from per-field data (local to this field)
|
||||
last_week_dif_raster_field <- NULL
|
||||
three_week_dif_raster_field <- NULL
|
||||
|
||||
if (!is.null(field_CI) && !is.null(field_CI_m1)) {
|
||||
last_week_dif_raster_field <- field_CI - field_CI_m1
|
||||
}
|
||||
if (!is.null(field_CI) && !is.null(field_CI_m3)) {
|
||||
three_week_dif_raster_field <- field_CI - field_CI_m3
|
||||
}
|
||||
|
||||
# Call ci_plot with field-specific rasters
|
||||
ci_plot(
|
||||
pivotName = field_name,
|
||||
field_boundaries = AllPivots0,
|
||||
current_ci = field_CI,
|
||||
ci_minus_1 = field_CI_m1,
|
||||
ci_minus_2 = field_CI_m2,
|
||||
last_week_diff = last_week_dif_raster_field,
|
||||
three_week_diff = three_week_dif_raster_field,
|
||||
harvesting_data = harvesting_data,
|
||||
week = week,
|
||||
week_minus_1 = week_minus_1,
|
||||
week_minus_2 = week_minus_2,
|
||||
week_minus_3 = week_minus_3,
|
||||
borders = borders,
|
||||
colorblind_friendly = colorblind_friendly
|
||||
)
|
||||
|
||||
cat("\n\n")
|
||||
|
||||
# Special handling for ESA project field 00f25 - remove duplicate DOY values
|
||||
if (project_dir == "esa" && field_name == "00F25") {
|
||||
ci_quadrant_data <- CI_quadrant %>%
|
||||
filter(field == "00F25") %>%
|
||||
arrange(DOY) %>%
|
||||
group_by(DOY) %>%
|
||||
slice(1) %>%
|
||||
ungroup()
|
||||
} else {
|
||||
ci_quadrant_data <- CI_quadrant
|
||||
}
|
||||
|
||||
# Call cum_ci_plot with explicit parameters
|
||||
cum_ci_plot(
|
||||
pivotName = field_name,
|
||||
ci_quadrant_data = ci_quadrant_data,
|
||||
plot_type = ci_plot_type,
|
||||
facet_on = facet_by_season,
|
||||
x_unit = x_axis_unit,
|
||||
colorblind_friendly = colorblind_friendly,
|
||||
show_benchmarks = TRUE,
|
||||
estate_name = project_dir,
|
||||
benchmark_percentiles = c(10, 50, 90),
|
||||
benchmark_data = benchmarks
|
||||
)
|
||||
|
||||
cat("\n\n")
|
||||
|
||||
# Add field-specific KPI summary under the graphs
|
||||
if (exists("field_details_table") && !is.null(field_details_table)) {
|
||||
kpi_summary <- generate_field_kpi_summary(field_name, field_details_table, CI_quadrant)
|
||||
cat(kpi_summary)
|
||||
cat("\n\n")
|
||||
}
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error generating plots for field", field_name, ":", e$message), "ERROR")
|
||||
cat("\\newpage\n\n")
|
||||
cat("# Error generating plots for field ", field_name, "\n\n")
|
||||
cat(e$message, "\n\n")
|
||||
})
|
||||
}
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error in field visualization section:", e$message), "ERROR")
|
||||
cat("Error generating field plots. See log for details.\n\n")
|
||||
})
|
||||
```
|
||||
|
||||
\newpage
|
||||
|
||||
# END OF NO-TABLES DIAGNOSTIC REPORT
|
||||
|
||||
This diagnostic report contains only maps and graphs to help identify if the visualization system is working correctly.
|
||||
|
||||
*Generated for diagnostic purposes*
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
params:
|
||||
ref: "word-styles-reference-var1.docx"
|
||||
output_file: CI_report.docx
|
||||
report_date: "2026-01-25"
|
||||
report_date: "2025-09-30"
|
||||
data_dir: "angata"
|
||||
mail_day: "Wednesday"
|
||||
borders: FALSE
|
||||
|
|
@ -38,20 +38,25 @@ reporting_script <- TRUE
|
|||
|
||||
# Load all packages at once with suppressPackageStartupMessages
|
||||
suppressPackageStartupMessages({
|
||||
library(here)
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(tidyverse)
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
library(knitr)
|
||||
library(tidyr)
|
||||
library(flextable)
|
||||
# File path handling
|
||||
library(here) # For relative path resolution (platform-independent file paths)
|
||||
|
||||
# Spatial data handling
|
||||
library(sf) # For reading/manipulating field boundaries (GeoJSON)
|
||||
library(terra) # For raster operations (reading mosaic TIFFs for visualization)
|
||||
|
||||
# Data manipulation
|
||||
library(tidyverse) # For dplyr, ggplot2, tidyr (data wrangling and visualization)
|
||||
library(tidyr) # For data reshaping (pivot_longer, pivot_wider for wide-to-long conversion)
|
||||
library(lubridate) # For date/time operations (week extraction, date formatting)
|
||||
library(zoo) # For zoo objects (time series manipulation, na.locf for gap filling)
|
||||
|
||||
# Visualization
|
||||
library(tmap) # For interactive maps (field boundary visualization)
|
||||
|
||||
# Reporting
|
||||
library(knitr) # For R Markdown document generation (code execution and output)
|
||||
library(flextable) # For formatted tables in Word output (professional table styling)
|
||||
})
|
||||
|
||||
# Load custom utility functions
|
||||
|
|
@ -105,6 +110,16 @@ tryCatch({
|
|||
stop("Error loading parameters_project.R: ", e$message)
|
||||
})
|
||||
|
||||
# Source common utilities for logging and helper functions
|
||||
tryCatch({
|
||||
source(here::here("r_app", "00_common_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading 00_common_utils.R: ", e$message)
|
||||
})
|
||||
|
||||
# Load centralized paths
|
||||
paths <- setup_project_directories(project_dir)
|
||||
|
||||
# Log initial configuration
|
||||
safe_log("Starting the R Markdown script with KPIs")
|
||||
safe_log(paste("mail_day params:", params$mail_day))
|
||||
|
|
@ -112,16 +127,23 @@ safe_log(paste("report_date params:", params$report_date))
|
|||
safe_log(paste("mail_day variable:", mail_day))
|
||||
```
|
||||
|
||||
```{r load_kpi_data, eval=TRUE, message=FALSE, warning=FALSE, include=FALSE}
|
||||
```{r load_kpi_data, message=FALSE, warning=FALSE}
|
||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||
# Primary expected directory inside the laravel storage
|
||||
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
||||
|
||||
# First, show working directory for debugging
|
||||
cat("\n=== DEBUG: R Markdown Working Directory ===\n")
|
||||
cat(paste("getwd():", getwd(), "\n"))
|
||||
cat(paste("Expected knit_dir from R Markdown:", knitr::opts_knit$get("root.dir"), "\n\n"))
|
||||
|
||||
# Primary expected directory from centralized paths
|
||||
kpi_data_dir <- paths$kpi_reports_dir
|
||||
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
||||
|
||||
# Calculate current week from report_date using ISO 8601 week numbering
|
||||
current_week <- as.numeric(format(as.Date(report_date), "%V"))
|
||||
current_iso_year <- as.numeric(format(as.Date(report_date), "%G"))
|
||||
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_iso_year)
|
||||
current_year <- as.numeric(format(as.Date(report_date), "%G"))
|
||||
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
|
||||
|
||||
# Candidate filenames we expect (exact and common variants)
|
||||
expected_summary_names <- c(
|
||||
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
|
||||
|
|
@ -171,30 +193,69 @@ if (is.null(summary_file) || is.null(field_details_file)) {
|
|||
|
||||
# Final checks and load with safe error messages
|
||||
kpi_files_exist <- FALSE
|
||||
|
||||
# Debug: log what we're looking for
|
||||
cat("\n=== KPI LOADING DEBUG ===\n")
|
||||
cat(paste("Working directory:", getwd(), "\n"))
|
||||
cat(paste("project_dir:", project_dir, "\n"))
|
||||
cat(paste("report_date:", report_date, "\n"))
|
||||
cat(paste("Calculated week:", current_week, "year:", current_year, "\n"))
|
||||
cat(paste("Looking for KPI files in:", kpi_data_dir, "\n"))
|
||||
cat(paste("Directory exists:", dir.exists(kpi_data_dir), "\n"))
|
||||
cat(paste("Expected filenames to match:\n"))
|
||||
for (name in expected_summary_names) cat(paste(" -", name, "\n"))
|
||||
|
||||
# List what's actually in the directory
|
||||
if (dir.exists(kpi_data_dir)) {
|
||||
actual_files <- list.files(kpi_data_dir, pattern = ".*\\.rds$", full.names = FALSE)
|
||||
cat(paste("Files in KPI directory (", length(actual_files), " total):\n"))
|
||||
for (f in actual_files) cat(paste(" -", f, "\n"))
|
||||
} else {
|
||||
cat("KPI directory does NOT exist!\n")
|
||||
}
|
||||
|
||||
if (!is.null(summary_file) && file.exists(summary_file)) {
|
||||
safe_log(paste("Loading KPI summary from:", summary_file))
|
||||
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
|
||||
cat(paste("✓ FOUND summary file:", summary_file, "\n"))
|
||||
cat(paste(" File size:", file.size(summary_file), "bytes\n"))
|
||||
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL })
|
||||
|
||||
if (!is.null(summary_data)) {
|
||||
cat(paste(" ✓ Loaded successfully. Class:", class(summary_data), "\n"))
|
||||
if (is.list(summary_data)) {
|
||||
cat(paste(" List names:", paste(names(summary_data), collapse = ", "), "\n"))
|
||||
}
|
||||
|
||||
# Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format
|
||||
if (!is.null(summary_data)) {
|
||||
if (is.list(summary_data) && !is.data.frame(summary_data)) {
|
||||
# New format from 09_field_analysis_weekly.R - just pass it through
|
||||
if ("field_analysis_summary" %in% names(summary_data)) {
|
||||
cat(" ✓ Found field_analysis_summary in list - will use this structure\n")
|
||||
# Keep the new structure intact - combined_kpi_table will use it directly
|
||||
kpi_files_exist <- TRUE
|
||||
} else {
|
||||
cat(" ! Old format detected\n")
|
||||
# Old format - keep as is
|
||||
summary_tables <- summary_data
|
||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
||||
}
|
||||
} else {
|
||||
cat(" ! Data frame format\n")
|
||||
# Data frame format or direct tables
|
||||
summary_tables <- summary_data
|
||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
||||
}
|
||||
} else {
|
||||
cat(" ✗ Failed to load RDS - summary_data is NULL\n")
|
||||
}
|
||||
} else {
|
||||
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
|
||||
safe_log(paste("Attempted directory:", kpi_data_dir), "WARNING")
|
||||
# Try searching the entire workspace as fallback
|
||||
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "kpi.*\\.rds$", recursive = TRUE, full.names = TRUE)
|
||||
safe_log(paste("Found", length(files), "KPI RDS files in workspace"), "INFO")
|
||||
if (length(files) > 0) {
|
||||
safe_log(paste("Available files:", paste(basename(files), collapse = ", ")), "INFO")
|
||||
}
|
||||
}
|
||||
|
||||
if (!is.null(field_details_file) && file.exists(field_details_file)) {
|
||||
|
|
@ -427,7 +488,7 @@ safe_log(paste("Week range:", week_start, "to", week_end))
|
|||
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE, eval=TRUE}
|
||||
# Load CI index data with error handling
|
||||
tryCatch({
|
||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
|
||||
safe_log("Successfully loaded CI quadrant data")
|
||||
}, error = function(e) {
|
||||
|
|
@ -683,20 +744,77 @@ tryCatch({
|
|||
\newpage
|
||||
## 1.2 Key Performance Indicators
|
||||
|
||||
```{r combined_kpi_table, echo=FALSE, eval=TRUE}
|
||||
```{r combined_kpi_table, echo=TRUE}
|
||||
# Debug: check what variables exist
|
||||
cat("\n=== DEBUG: combined_kpi_table chunk ===\n")
|
||||
cat(paste("exists('summary_data'):", exists("summary_data"), "\n"))
|
||||
cat(paste("exists('kpi_files_exist'):", exists("kpi_files_exist"), "\n"))
|
||||
if (exists("kpi_files_exist")) {
|
||||
cat(paste("kpi_files_exist value:", kpi_files_exist, "\n"))
|
||||
}
|
||||
if (exists("summary_data")) {
|
||||
cat(paste("summary_data class:", class(summary_data), "\n"))
|
||||
if (is.list(summary_data)) {
|
||||
cat(paste("summary_data names:", paste(names(summary_data), collapse = ", "), "\n"))
|
||||
cat(paste("has field_analysis_summary:", "field_analysis_summary" %in% names(summary_data), "\n"))
|
||||
}
|
||||
} else {
|
||||
cat("summary_data DOES NOT EXIST in this chunk's environment!\n")
|
||||
}
|
||||
cat("\n")
|
||||
|
||||
# Create summary KPI table from field_analysis_summary data
|
||||
# This shows: Phases, Triggers, Area Change, and Total Farm acreage
|
||||
|
||||
if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary" %in% names(summary_data)) {
|
||||
field_analysis_summary <- summary_data$field_analysis_summary
|
||||
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
||||
# Load field analysis data
|
||||
field_analysis_df <- summary_data$field_analysis
|
||||
|
||||
# If field_analysis_summary is NULL or doesn't exist, create it from field_analysis_df
|
||||
if (is.null(summary_data$field_analysis_summary) || !("field_analysis_summary" %in% names(summary_data)) ||
|
||||
!is.data.frame(summary_data$field_analysis_summary)) {
|
||||
cat("\nNote: field_analysis_summary not in RDS, creating from field_analysis...\n")
|
||||
|
||||
# Create summary by aggregating by Status_Alert and Phase categories
|
||||
# This groups fields by their phase and status to show distribution
|
||||
phase_summary <- field_analysis_df %>%
|
||||
filter(!is.na(Phase)) %>%
|
||||
group_by(Phase) %>%
|
||||
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
|
||||
mutate(Category = Phase) %>%
|
||||
select(Category, Acreage)
|
||||
|
||||
# Try to create Status trigger summary - use Status_Alert if available, otherwise use empty
|
||||
trigger_summary <- tryCatch({
|
||||
field_analysis_df %>%
|
||||
filter(!is.na(Status_Alert), Status_Alert != "") %>%
|
||||
group_by(Status_Alert) %>%
|
||||
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
|
||||
mutate(Category = Status_Alert) %>%
|
||||
select(Category, Acreage)
|
||||
}, error = function(e) {
|
||||
cat("Could not create trigger summary:", e$message, "\n")
|
||||
data.frame(Category = character(), Acreage = numeric())
|
||||
})
|
||||
|
||||
# Combine into summary
|
||||
field_analysis_summary <- bind_rows(phase_summary, trigger_summary)
|
||||
|
||||
cat(paste("Created summary with", nrow(field_analysis_summary), "category rows\n"))
|
||||
|
||||
} else {
|
||||
# Use existing summary from RDS
|
||||
field_analysis_summary <- summary_data$field_analysis_summary
|
||||
}
|
||||
|
||||
# Phase names and trigger names to extract from summary
|
||||
phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
|
||||
trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected",
|
||||
"Germination Complete", "Germination Started", "No Active Trigger")
|
||||
"Germination Complete", "Germination Started", "No Active Trigger",
|
||||
"Ready for harvest-check", "Strong decline in crop health", "Harvested/bare")
|
||||
|
||||
# Extract phase distribution - match on category names directly
|
||||
if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) {
|
||||
phase_rows <- field_analysis_summary %>%
|
||||
filter(Category %in% phase_names) %>%
|
||||
select(Category, Acreage) %>%
|
||||
|
|
@ -802,6 +920,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary"
|
|||
}
|
||||
|
||||
ft
|
||||
} else {
|
||||
cat("KPI summary data available but is empty/invalid.\n")
|
||||
}
|
||||
} else {
|
||||
cat("KPI summary data not available.\n")
|
||||
}
|
||||
|
|
@ -854,11 +975,11 @@ if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) {
|
|||
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
||||
field_analysis_table <- summary_data$field_analysis
|
||||
|
||||
# Extract fields with status triggers (non-null)
|
||||
# Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger)
|
||||
alerts_data <- field_analysis_table %>%
|
||||
filter(!is.na(Status_trigger), Status_trigger != "") %>%
|
||||
select(Field_id, Status_trigger) %>%
|
||||
rename(Field = Field_id, Alert = Status_trigger)
|
||||
filter(!is.na(Status_Alert), Status_Alert != "") %>%
|
||||
select(Field_id, Status_Alert) %>%
|
||||
rename(Field = Field_id, Alert = Status_Alert)
|
||||
|
||||
if (nrow(alerts_data) > 0) {
|
||||
# Format alert messages for display
|
||||
|
|
@ -892,22 +1013,50 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
|||
# The report renders KPI tables and field summaries from that data
|
||||
```
|
||||
|
||||
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE, eval=TRUE}
|
||||
# Load field boundaries from parameters
|
||||
field_boundaries_sf <- sf::st_make_valid(field_boundaries_sf)
|
||||
tryCatch({
|
||||
AllPivots0 <- field_boundaries_sf %>%
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names
|
||||
safe_log("Successfully loaded field boundaries")
|
||||
```{r load_field_boundaries, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Load field boundaries from parameters (with fallback if geometry is invalid)
|
||||
field_boundaries_loaded <- FALSE
|
||||
|
||||
# Prepare merged field list for use in summaries
|
||||
tryCatch({
|
||||
# Try to load and validate the field boundaries
|
||||
if (exists("field_boundaries_sf") && !is.null(field_boundaries_sf)) {
|
||||
# Try to filter - this will trigger geometry validation
|
||||
AllPivots0 <- field_boundaries_sf %>%
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field))
|
||||
|
||||
# If successful, also create merged field list
|
||||
AllPivots_merged <- AllPivots0 %>%
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field)) %>%
|
||||
dplyr::group_by(field) %>%
|
||||
dplyr::summarise(.groups = 'drop')
|
||||
|
||||
field_boundaries_loaded <- TRUE
|
||||
safe_log("✓ Successfully loaded field boundaries")
|
||||
} else {
|
||||
safe_log("⚠ field_boundaries_sf not found in environment")
|
||||
}
|
||||
}, error = function(e) {
|
||||
stop("Error loading field boundaries: ", e$message)
|
||||
# If geometry is invalid, try to fix or skip
|
||||
safe_log(paste("⚠ Error loading field boundaries:", e$message), "WARNING")
|
||||
safe_log("Attempting to fix invalid geometries using st_make_valid()...", "WARNING")
|
||||
|
||||
tryCatch({
|
||||
# Try to repair invalid geometries
|
||||
field_boundaries_sf_fixed <<- sf::st_make_valid(field_boundaries_sf)
|
||||
AllPivots0 <<- field_boundaries_sf_fixed %>%
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field))
|
||||
|
||||
AllPivots_merged <<- AllPivots0 %>%
|
||||
dplyr::filter(!is.na(field), !is.na(sub_field)) %>%
|
||||
dplyr::group_by(field) %>%
|
||||
dplyr::summarise(.groups = 'drop')
|
||||
|
||||
field_boundaries_loaded <<- TRUE
|
||||
safe_log("✓ Fixed invalid geometries and loaded field boundaries")
|
||||
}, error = function(e2) {
|
||||
safe_log(paste("⚠ Could not repair geometries:", e2$message), "WARNING")
|
||||
safe_log("Continuing without field boundary data", "WARNING")
|
||||
})
|
||||
})
|
||||
```
|
||||
\newpage
|
||||
|
|
|
|||
631
r_app/MANUAL_PIPELINE_RUNNER.R
Normal file
631
r_app/MANUAL_PIPELINE_RUNNER.R
Normal file
|
|
@ -0,0 +1,631 @@
|
|||
# ==============================================================================
|
||||
# SMARTCANE MANUAL PIPELINE RUNNER
|
||||
# ==============================================================================
|
||||
#
|
||||
# This file documents all pipeline steps as MANUAL COPY-PASTE COMMANDS.
|
||||
# Do NOT run this script directly - instead, copy individual commands and
|
||||
# paste them into your PowerShell terminal.
|
||||
#
|
||||
# This approach allows you to:
|
||||
# - Run steps one at a time and inspect outputs
|
||||
# - Re-run failed steps without re-running successful ones
|
||||
# - Monitor progress between steps
|
||||
# - Troubleshoot issues more easily than with automated pipeline
|
||||
#
|
||||
# ==============================================================================
|
||||
# PIPELINE SEQUENCE (IN ORDER)
|
||||
# ==============================================================================
|
||||
#
|
||||
# 1. Python: Download Planet satellite imagery (optional - only if new data needed)
|
||||
# 2. R10: Split farm TIFFs into per-field directory structure
|
||||
# 3. R20: Extract Canopy Index (CI) from 4-band imagery
|
||||
# 4. R30: Interpolate growth model (smooth CI time series)
|
||||
# 5. R21: Convert CI data to CSV format for Python
|
||||
# 6. Python31: Harvest imminent predictions (optional - requires harvest.xlsx)
|
||||
# 7. R40: Create weekly mosaic TIFFs
|
||||
# 8. R80: Calculate KPIs (field uniformity, trends, stress)
|
||||
# 9. R90/91: Generate Word reports (optional - Agronomic or Cane Supply)
|
||||
#
|
||||
# ==============================================================================
|
||||
# BEFORE YOU START
|
||||
# ==============================================================================
|
||||
#
|
||||
# 1. Open PowerShell in the smartcane root directory:
|
||||
# C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\
|
||||
#
|
||||
# 2. Define your parameters ONCE at the top of the session:
|
||||
#
|
||||
# $PROJECT = "angata" # Project: angata, chemba, xinavane, esa, simba
|
||||
# $END_DATE = "2026-02-04" # YYYY-MM-DD format (e.g., 2026-02-04)
|
||||
# $OFFSET = 7 # Days to look back (e.g., 7 for one week)
|
||||
# $WEEK = 6 # ISO week number (1-53) - auto-calculated from END_DATE
|
||||
# $YEAR = 2026 # ISO year - auto-calculated from END_DATE
|
||||
#
|
||||
# 3. Use these variables in the commands below by replacing [PROJECT], [END_DATE], etc.
|
||||
#
|
||||
# ==============================================================================
|
||||
# COMMAND REFERENCE
|
||||
# ==============================================================================
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 0: PYTHON - Download Planet Satellite Imagery (OPTIONAL)
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Download 4-band (RGB+NIR) satellite imagery from Planet Labs API
|
||||
# Downloads to: laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif
|
||||
#
|
||||
# WHEN TO RUN:
|
||||
# - Only needed if you have new dates to process
|
||||
# - Pipeline skips dates already in merged_tif/ or field_tiles/
|
||||
# - First-time setup: download for your date range
|
||||
#
|
||||
# PARAMETERS:
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||
# DATE: YYYY-MM-DD format (e.g., 2026-02-04)
|
||||
# RESOLUTION: 3 meters (default) - can also use 5, 10
|
||||
# --cleanup: Delete intermediate files after download
|
||||
# --clear-all: Clear all output folders before downloading
|
||||
#
|
||||
# COMMAND #1 - Single Date Download:
|
||||
#
|
||||
# cd python_app
|
||||
# python 00_download_8band_pu_optimized.py [PROJECT] --date [DATE] --resolution 3 --cleanup
|
||||
#
|
||||
# Example:
|
||||
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
|
||||
#
|
||||
# COMMAND #2 - Batch Download (Multiple Dates):
|
||||
# For date ranges, MUST use download_planet_missing_dates.py (not Script 00)
|
||||
#
|
||||
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
|
||||
#
|
||||
# Example:
|
||||
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
|
||||
#
|
||||
# IMPORTANT DISTINCTION:
|
||||
# - Script 00 (00_download_8band_pu_optimized.py): Only supports --date flag for SINGLE dates
|
||||
# - Script download_planet_missing_dates.py: Supports --start/--end for DATE RANGES
|
||||
# Script 00 does NOT have --start/--end flags despite documentation suggestion
|
||||
# Use the correct script for your use case!
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
|
||||
#
|
||||
# Note: Planet API requires authentication (PLANET_API_KEY environment variable)
|
||||
# Cost: ~1,500-2,000 PU per date
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 1: R10 - Create Per-Field TIFF Structure
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Split farm-wide GeoTIFFs into per-field directory structure.
|
||||
# Transforms: merged_tif/{DATE}.tif (single file)
|
||||
# → field_tiles/{FIELD_ID}/{DATE}.tif (per-field files)
|
||||
# This enables clean, scalable processing in downstream scripts.
|
||||
#
|
||||
# INPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif (4-band RGB+NIR)
|
||||
# - Field boundaries: laravel_app/storage/app/{PROJECT}/pivot.geojson
|
||||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif
|
||||
# - One TIFF per field per date (1185 fields × N dates in Angata)
|
||||
#
|
||||
# PARAMETERS:
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba (default: angata)
|
||||
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-09, default: today)
|
||||
# OFFSET: Days to look back (e.g., 7 for one week, default: 7)
|
||||
#
|
||||
# COMMAND #1 - Default (All dates, current date, 7-day window):
|
||||
#
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||||
#
|
||||
# Example:
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||||
#
|
||||
# COMMAND #2 - Specific Date Range:
|
||||
#
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] [END_DATE] [OFFSET]
|
||||
#
|
||||
# Example (one week back from 2026-02-09):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
|
||||
#
|
||||
# Example (two weeks back from 2026-02-09):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 14
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
|
||||
# Storage location: laravel_app/storage/app/angata/field_tiles/
|
||||
# Script execution time: 5-10 minutes (depends on number of dates)
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 2: R20 - Extract Chlorophyll Index (CI)
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Calculate Chlorophyll Index from 4-band imagery and create 5-band output TIFFs.
|
||||
# Also extracts CI statistics per sub_field for daily tracking.
|
||||
#
|
||||
# INPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif (4-band)
|
||||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif (5-band with CI)
|
||||
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD_ID}/{DATE}.rds
|
||||
#
|
||||
# EXPECTED BEHAVIOR:
|
||||
# If field_tiles_CI/ or daily_vals/ missing files, Script 20 will process them
|
||||
# Script 20 skips files that already exist (to avoid re-processing)
|
||||
# ⚠️ IF NOT ALL FILES CREATED: See troubleshooting section below
|
||||
#
|
||||
# PARAMETERS:
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - date range end
|
||||
# OFFSET: Days to look back (e.g., 7 for one week window)
|
||||
#
|
||||
# COMMAND:
|
||||
#
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
|
||||
#
|
||||
# Example:
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-09 7
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
|
||||
# Example: 1185 fields × 8 dates = 9,480 files in field_tiles_CI/
|
||||
# Storage location: laravel_app/storage/app/angata/field_tiles_CI/
|
||||
# Script execution time: 10-20 minutes (depends on number of dates+fields)
|
||||
#
|
||||
# NOTES:
|
||||
# Script 20 processes dates between (END_DATE - OFFSET) and END_DATE
|
||||
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
|
||||
# To process all existing merged_tif files: Use large OFFSET (e.g., 365)
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 3: R30 - Interpolate Growth Model
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Smooth CI time series using LOESS interpolation to fill gaps.
|
||||
# Creates continuous growth curves for each field across all measurement dates.
|
||||
# Enables trend analysis, yield prediction, and cumulative growth metrics.
|
||||
#
|
||||
# INPUT:
|
||||
# - Daily CI statistics from Script 20 (field_tiles_CI/ per-field RDS files)
|
||||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||
# - (This is the growth model output used by Script 21 and 80)
|
||||
#
|
||||
# PARAMETERS:
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||
#
|
||||
# COMMAND:
|
||||
#
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R [PROJECT]
|
||||
#
|
||||
# Example:
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||
# Contains: Interpolated CI data for all fields (wide format)
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 4: R21 - Convert CI RDS to CSV (Python Format)
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Convert growth model output from R's RDS format to Python-compatible CSV.
|
||||
# Transforms from wide format (fields × dates) to long format (one row per field-date pair).
|
||||
# Prepares data for Python harvest detection models.
|
||||
#
|
||||
# INPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||
# (Output from Script 30)
|
||||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||
# - Columns: field, sub_field, Date, FitData, DOY, value
|
||||
#
|
||||
# PARAMETERS:
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||
#
|
||||
# COMMAND:
|
||||
#
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R [PROJECT]
|
||||
#
|
||||
# Example:
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# File: ci_data_for_python.csv (~5-10 MB)
|
||||
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 5: PYTHON31 - Harvest Imminent Predictions (OPTIONAL)
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Predict which fields are approaching harvest in the next 28 days.
|
||||
# Uses neural network (Model 307) trained on historical harvest dates.
|
||||
# Generates weekly probability scores for operational harvest scheduling.
|
||||
#
|
||||
# REQUIRES:
|
||||
# - harvest.xlsx with field planting/harvest dates
|
||||
# - ci_data_for_python.csv from Script 21
|
||||
# - PyTorch environment (conda pytorch_gpu)
|
||||
#
|
||||
# INPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/Data/harvest.xlsx
|
||||
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||
# - Columns: field, sub_field, imminent_prob, detected_prob, week, year, as_of_date, num_days
|
||||
#
|
||||
# PARAMETERS:
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||
#
|
||||
# COMMAND:
|
||||
#
|
||||
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py [PROJECT]
|
||||
#
|
||||
# Example:
|
||||
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py angata
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||
# Rows: One per field (e.g., 1185 rows for Angata)
|
||||
#
|
||||
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 6: R40 - Create Weekly Mosaic TIFFs
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Aggregate daily per-field CI TIFFs into weekly mosaics.
|
||||
# Handles multiple dates (full week) with maximum CI value per pixel.
|
||||
# Creates 5-band output for reporting and KPI calculations.
|
||||
#
|
||||
# INPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif
|
||||
# (Daily per-field CI TIFFs from Script 20)
|
||||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_{WW}_{YYYY}.tif
|
||||
# - One per field per week (e.g., 1185 fields × 1 week = 1,185 files)
|
||||
#
|
||||
# PARAMETERS:
|
||||
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - determines ISO week
|
||||
# OFFSET: Days to look back (e.g., 7 for one week window)
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||
#
|
||||
# COMMAND:
|
||||
#
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R [END_DATE] [OFFSET] [PROJECT]
|
||||
#
|
||||
# Example (one week window):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# Location: laravel_app/storage/app/angata/weekly_mosaic/
|
||||
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
|
||||
# Files created: #fields (e.g., 1185 for Angata)
|
||||
# Storage: ~50-100 MB total for all mosaic TIFFs
|
||||
# Script execution time: 5-10 minutes
|
||||
#
|
||||
# NOTE: Files are named with ISO week number (WW) and year (YYYY)
|
||||
# Week calculation is automatic based on END_DATE
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 7: R80 - Calculate Key Performance Indicators (KPIs)
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Calculate per-field metrics from weekly mosaic TIFFs:
|
||||
# - Field uniformity (CV - Coefficient of Variation)
|
||||
# - Growth trends (4-week and 8-week)
|
||||
# - Area change detection
|
||||
# - TCH forecast
|
||||
# - Spatial clustering (weed/stress detection)
|
||||
# - Generates Excel export for dashboards and reporting
|
||||
#
|
||||
# INPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_*.tif
|
||||
# - Field boundaries (pivot.geojson)
|
||||
# - Harvest data (harvest.xlsx)
|
||||
# - Historical stats cache (RDS from previous weeks)
|
||||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds (cached stats)
|
||||
# - 21 columns with field-level KPIs and alerts
|
||||
#
|
||||
# PARAMETERS:
|
||||
# END_DATE: Report date in YYYY-MM-DD format (default: today)
|
||||
# PROJECT: Project name: angata, chemba, xinavane, esa, simba (default: angata)
|
||||
# OFFSET: Days to look back for historical comparison (default: 7, for backward compatibility)
|
||||
#
|
||||
# COMMAND #1 - Current Date & Default Project (Auto-detects TODAY):
|
||||
#
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
|
||||
#
|
||||
# Example:
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
|
||||
#
|
||||
# COMMAND #2 - Specific Date & Project:
|
||||
#
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET]
|
||||
#
|
||||
# Example (2026-02-09, angata, 7-day lookback):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-09 angata 7
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||
# Rows: One per field (e.g., 1185 for Angata)
|
||||
# Columns: 21 KPI columns (uniformity, trend, alerts, etc.)
|
||||
# Location: laravel_app/storage/app/angata/output/
|
||||
# Script execution time: 10-20 minutes
|
||||
#
|
||||
# EXPECTED COLUMNS:
|
||||
# field, sub_field, phase, cv (uniformity), ci_mean, area_ha, area_ac,
|
||||
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
|
||||
# spatial_cluster, alert_urgency, alert_type, alert_message, etc.
|
||||
#
|
||||
# CRITICAL DIFFERENCE - R80 Uses Different Argument Order Than R40:
|
||||
# R40 order: [END_DATE] [OFFSET] [PROJECT]
|
||||
# R80 order: [END_DATE] [PROJECT] [OFFSET]
|
||||
# These are NOT the same! Ensure correct order for each script.
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# STEP 8: R90/R91 - Generate Word Report (OPTIONAL)
|
||||
# ==============================================================================
|
||||
#
|
||||
# PURPOSE:
|
||||
# Generate formatted Word report (.docx) with:
|
||||
# - KPI summary tables and charts
|
||||
# - Per-field performance metrics
|
||||
# - Alerts and recommendations
|
||||
# - Interpretation guides
|
||||
#
|
||||
# Client-Specific Reports:
|
||||
# - R90: Agronomic Support (for AURA project)
|
||||
# - R91: Cane Supply (for ANGATA, CHEMBA, XINAVANE, ESA)
|
||||
#
|
||||
# INPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||
# (from Script 80)
|
||||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_*.docx
|
||||
# - Formatted Word document (~5-10 MB)
|
||||
#
|
||||
# PARAMETERS:
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04)
|
||||
# REPORT_TYPE: agronomic or cane_supply (determines which Rmd file to render)
|
||||
#
|
||||
# COMMAND #1 - AGRONOMIC REPORT (AURA project):
|
||||
# From R console or R script:
|
||||
#
|
||||
# rmarkdown::render(
|
||||
# "r_app/90_CI_report_with_kpis_simple.Rmd",
|
||||
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
||||
# output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx",
|
||||
# output_dir = "laravel_app/storage/app/angata/reports"
|
||||
# )
|
||||
#
|
||||
# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
|
||||
# From R console or R script:
|
||||
#
|
||||
# rmarkdown::render(
|
||||
# "r_app/91_CI_report_with_kpis_Angata.Rmd",
|
||||
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
||||
# output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
|
||||
# output_dir = "laravel_app/storage/app/angata/reports"
|
||||
# )
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# File: SmartCane_Report_*_{PROJECT}_{DATE}.docx
|
||||
# Location: laravel_app/storage/app/{PROJECT}/reports/
|
||||
# Script execution time: 5-10 minutes
|
||||
#
|
||||
# NOTE:
|
||||
# These are R Markdown files and cannot be run directly via Rscript
|
||||
# Use rmarkdown::render() from an R interactive session or wrapper script
|
||||
# See run_full_pipeline.R for an automated example
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# QUICK REFERENCE: Common Workflows
|
||||
# ==============================================================================
|
||||
#
|
||||
# WORKFLOW A: Weekly Update (Most Common)
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# Goal: Process latest week of data through full pipeline
|
||||
#
|
||||
# Parameters:
|
||||
# $PROJECT = "angata"
|
||||
# $END_DATE = "2026-02-04" # Today or latest date available
|
||||
# $OFFSET = 7 # One week back
|
||||
#
|
||||
# Steps:
|
||||
# 1. SKIP Python download (if you already have data)
|
||||
# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 7
|
||||
# (Argument order: [PROJECT] [END_DATE] [OFFSET])
|
||||
# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
|
||||
# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||
# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||
# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||||
# (Argument order: [END_DATE] [OFFSET] [PROJECT])
|
||||
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-04 angata 7
|
||||
# (Argument order: [END_DATE] [PROJECT] [OFFSET] - DIFFERENT from R40!)
|
||||
# 8. OPTIONAL R91 (Cane Supply) - Use automated runner:
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
|
||||
# OR from R console:
|
||||
# rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd",
|
||||
# params=list(data_dir="angata", report_date=as.Date("2026-02-04")),
|
||||
# output_file="SmartCane_Report_cane_supply_angata_2026-02-04.docx",
|
||||
# output_dir="laravel_app/storage/app/angata/reports")
|
||||
#
|
||||
# Execution time: ~60-90 minutes total
|
||||
#
|
||||
#
|
||||
# WORKFLOW B: Initial Setup (Large Backfill)
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# Goal: Process multiple weeks of historical data
|
||||
#
|
||||
# Steps:
|
||||
# 1. Python download (your entire date range)
|
||||
# 2. Run R10 with large offset to process all historical dates:
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 365
|
||||
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
|
||||
# 3. Run R20 with large offset to process all historical dates:
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
|
||||
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
|
||||
# 4. Run R30 once (growth model full season)
|
||||
# 5. Run R21 once (CSV export)
|
||||
# 6. Run R40 with specific week windows as needed
|
||||
# 7. Run R80 for each week you want KPIs for
|
||||
|
||||
# 6. For each week, run:
|
||||
# - R40 with different END_DATE values (one per week)
|
||||
# - R80 with different WEEK/YEAR values (one per week)
|
||||
# - R91 optional (one per week report)
|
||||
#
|
||||
# Pro tip: Script R40 with offset=14 covers two weeks at once
|
||||
# Then R40 again with offset=7 for just one week
|
||||
#
|
||||
#
|
||||
# WORKFLOW C: Troubleshooting (Check Intermediate Outputs)
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# Goal: Verify outputs before moving to next step
|
||||
#
|
||||
# After R10: Check field_tiles/{FIELD_ID}/ has #dates files
|
||||
# After R20: Check field_tiles_CI/{FIELD_ID}/ has same #dates files
|
||||
# After R30: Check Data/extracted_ci/cumulative_vals/ has All_pivots_*.rds
|
||||
# After R40: Check weekly_mosaic/{FIELD_ID}/ has week_WW_YYYY.tif per week
|
||||
# After R80: Check output/ has {PROJECT}_field_analysis_week*.xlsx
|
||||
#
|
||||
# ============================================================================
|
||||
|
||||
# ==============================================================================
|
||||
# TROUBLESHOOTING
|
||||
# ==============================================================================
|
||||
#
|
||||
# ISSUE: R20 not processing all field_tiles files
|
||||
# ────────────────────────────────────────────────
|
||||
# Symptom: field_tiles has 496 files, field_tiles_CI only has 5
|
||||
#
|
||||
# Possible causes:
|
||||
# 1. Source files incomplete or corrupted
|
||||
# 2. Script 20 skips because CI TIFF already exists (even if incomplete)
|
||||
# 3. Partial run from previous attempt
|
||||
#
|
||||
# Solutions:
|
||||
# 1. Delete the small number of files in field_tiles_CI/{FIELD}/ (don't delete all!)
|
||||
# rm laravel_app/storage/app/angata/field_tiles_CI/{fieldnum}/*
|
||||
# 2. Re-run Script 20
|
||||
# 3. If still fails, delete field_tiles_CI completely and re-run Script 20
|
||||
# rm -r laravel_app/storage/app/angata/field_tiles_CI/
|
||||
#
|
||||
# ISSUE: Script 80 says "No per-field mosaic files found"
|
||||
# ────────────────────────────────────────────────────────
|
||||
# Symptom: R80 fails to calculate KPIs
|
||||
#
|
||||
# Possible causes:
|
||||
# 1. Script 40 hasn't run yet (weekly_mosaic doesn't exist)
|
||||
# 2. Wrong END_DATE or WEEK/YEAR combination
|
||||
# 3. weekly_mosaic/{FIELD}/ directory missing (old format?)
|
||||
#
|
||||
# Solutions:
|
||||
# 1. Ensure Script 40 has completed: Check weekly_mosaic/{FIELD}/ exists with week_WW_YYYY.tif
|
||||
# 2. Verify END_DATE is within date range of available CI data
|
||||
# 3. For current week: End date must be THIS week (same ISO week as today)
|
||||
#
|
||||
# ISSUE: Python download fails ("Not authorized")
|
||||
# ────────────────────────────────────────────────
|
||||
# Symptom: python 00_download_8band_pu_optimized.py fails with authentication error
|
||||
#
|
||||
# Cause: PLANET_API_KEY environment variable not set
|
||||
#
|
||||
# Solution:
|
||||
# 1. Save your Planet API key: $env:PLANET_API_KEY = "your_key_here"
|
||||
# 2. Verify: $env:PLANET_API_KEY (should show your key)
|
||||
# 3. Try download again
|
||||
#
|
||||
# ISSUE: R30 takes too long
|
||||
# ──────────────────────────
|
||||
# Symptom: Script 30 running for >30 minutes
|
||||
#
|
||||
# Cause: LOESS interpolation is slow with many dates/fields
|
||||
#
|
||||
# Solution:
|
||||
# 1. This is normal - large date ranges slow down interpolation
|
||||
# 2. Subsequent runs are faster (cached results)
|
||||
# 3. If needed: reduce offset or run fewer weeks at a time
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
# ==============================================================================
|
||||
# SUMMARY OF FILES CREATED BY EACH SCRIPT
|
||||
# ==============================================================================
|
||||
#
|
||||
# Script 10 creates:
|
||||
# laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD}/{DATE}.tif
|
||||
#
|
||||
# Script 20 creates:
|
||||
# laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD}/{DATE}.tif
|
||||
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds
|
||||
#
|
||||
# Script 30 creates:
|
||||
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||
#
|
||||
# Script 21 creates:
|
||||
# laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||
#
|
||||
# Python 31 creates:
|
||||
# laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||
#
|
||||
# Script 40 creates:
|
||||
# laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD}/{DATE}/week_{WW}_{YYYY}.tif
|
||||
#
|
||||
# Script 80 creates:
|
||||
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds
|
||||
#
|
||||
# Script 90/91 creates:
|
||||
# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
54
r_app/_SCRIPT_HEADER_TEMPLATE.R
Normal file
54
r_app/_SCRIPT_HEADER_TEMPLATE.R
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# ============================================================================
|
||||
# SCRIPT XX: [DESCRIPTIVE TITLE]
|
||||
# ============================================================================
|
||||
# PURPOSE:
|
||||
# [What this script does in 2-3 sentences. Example: Downloads satellite
|
||||
# imagery from Planet API, processes 4-band RGB+NIR data, and saves
|
||||
# merged GeoTIFFs for use by downstream analysis stages.]
|
||||
#
|
||||
# INPUT DATA:
|
||||
# - Source: [Which directory/files it reads. Example: Planet API, hardcoded bbox]
|
||||
# - Format: [TIFF, RDS, GeoJSON, etc. Example: 4-band uint16 GeoTIFF]
|
||||
# - Location: [Full path example]
|
||||
#
|
||||
# OUTPUT DATA:
|
||||
# - Destination: [Which directory/files it creates. Example: laravel_app/storage/app/{project}/merged_tif/]
|
||||
# - Format: [TIFF, RDS, CSV, etc. Example: Single-band float32 GeoTIFF]
|
||||
# - Naming convention: [Example: {YYYY-MM-DD}.tif]
|
||||
#
|
||||
# USAGE:
|
||||
# Rscript XX_script_name.R [arg1] [arg2] [arg3]
|
||||
#
|
||||
# Example (Windows PowerShell):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/XX_script_name.R angata 2026-01-15
|
||||
#
|
||||
# PARAMETERS:
|
||||
# - arg1: [Description, type, and valid values. Example: project (character) - angata, chemba, xinavane, esa, simba]
|
||||
# - arg2: [Description, type, and valid values. Example: date (character, optional) - ISO format YYYY-MM-DD; default today]
|
||||
#
|
||||
# CLIENT TYPES:
|
||||
# - cane_supply (ANGATA): [Yes/No - if Yes, briefly explain any differences]
|
||||
# - agronomic_support (AURA): [Yes/No - if Yes, briefly explain any differences]
|
||||
#
|
||||
# DEPENDENCIES:
|
||||
# - Packages: [dplyr, tidyr, terra, sf, readr, writexl, etc.]
|
||||
# - Utils files: [parameters_project.R, 00_common_utils.R, XX_utils.R, etc.]
|
||||
# - External data: [harvest.xlsx, pivot.geojson, etc.]
|
||||
# - Data directories: [laravel_app/storage/app/{project}/]
|
||||
#
|
||||
# NOTES:
|
||||
# [Any special considerations, assumptions, or future improvements.
|
||||
# Example: Cloud filtering uses CI >= 0.5 threshold. Multi-field support
|
||||
# implemented via field geometry masking from pivot.geojson.]
|
||||
#
|
||||
# RELATED ISSUES:
|
||||
# SC-XXX: [Brief description of related work]
|
||||
#
|
||||
# HISTORY:
|
||||
# 2026-02-03: [Description of change, if refactored or enhanced]
|
||||
# ============================================================================
|
||||
|
||||
# NOTE: This is a TEMPLATE file for documentation purposes only.
|
||||
# When creating a new script or updating an existing one, copy this template
|
||||
# and fill in all sections with accurate information about your specific script.
|
||||
# Then delete this comment block.
|
||||
|
|
@ -156,9 +156,10 @@ calculate_ci <- function(raster_obj) {
|
|||
red_band <- raster_obj[[3]]
|
||||
nir_band <- raster_obj[[4]]
|
||||
|
||||
# CI formula: (NIR / Red) - 1
|
||||
# This highlights chlorophyll content in vegetation
|
||||
ci_raster <- (nir_band / red_band) - 1
|
||||
# CI formula: (NIR / Green) - 1, NOT (NIR / Red) - 1
|
||||
# *** CRITICAL: Use GREEN band for Chlorophyll Index, NOT RED ***
|
||||
# GREEN band is essential for proper chlorophyll-sensitive calculation
|
||||
ci_raster <- (nir_band / green_band) - 1
|
||||
|
||||
# Filter extreme values that may result from division operations
|
||||
ci_raster[ci_raster > 10] <- 10 # Cap max value
|
||||
|
|
|
|||
|
|
@ -68,8 +68,9 @@ calculate_enhanced_indices <- function(red, green, blue, nir) {
|
|||
grvi <- green / red
|
||||
names(grvi) <- "GRVI"
|
||||
|
||||
# 6. Chlorophyll Index (current CI - for comparison)
|
||||
ci <- nir / red - 1
|
||||
# 6. Chlorophyll Index (CI = NIR / Green - 1, NOT NIR/Red)
|
||||
# *** CRITICAL: Correct formula uses GREEN band, not RED ***
|
||||
ci <- nir / green - 1
|
||||
names(ci) <- "CI"
|
||||
|
||||
return(list(
|
||||
|
|
|
|||
|
|
@ -69,31 +69,94 @@ calculate_week_numbers <- function(report_date = Sys.Date()) {
|
|||
#' @param year Year
|
||||
#' @param mosaic_dir Directory containing weekly mosaics
|
||||
#' @return Terra raster with CI band, or NULL if file not found
|
||||
# Helper function to load CI raster for a specific field (handles both single-file and per-field architectures)
|
||||
load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) {
|
||||
# Check if this is per-field loading mode
|
||||
is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field")
|
||||
|
||||
if (is_per_field) {
|
||||
# Per-field architecture: load this specific field's mosaic
|
||||
per_field_dir <- attr(ci_raster_or_obj, "per_field_dir")
|
||||
week_file <- attr(ci_raster_or_obj, "week_file")
|
||||
field_mosaic_path <- file.path(per_field_dir, field_name, week_file)
|
||||
|
||||
if (file.exists(field_mosaic_path)) {
|
||||
tryCatch({
|
||||
field_mosaic <- terra::rast(field_mosaic_path)
|
||||
# Extract CI band (5th band) if multi-band, otherwise use as-is
|
||||
if (terra::nlyr(field_mosaic) >= 5) {
|
||||
return(field_mosaic[[5]])
|
||||
} else {
|
||||
return(field_mosaic[[1]])
|
||||
}
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error loading per-field mosaic for", field_name, ":", e$message), "WARNING")
|
||||
return(NULL)
|
||||
})
|
||||
} else {
|
||||
safe_log(paste("Per-field mosaic not found for", field_name), "WARNING")
|
||||
return(NULL)
|
||||
}
|
||||
} else {
|
||||
# Single-file architecture: crop from loaded raster
|
||||
if (!is.null(field_vect)) {
|
||||
return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE))
|
||||
} else {
|
||||
return(ci_raster_or_obj)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
|
||||
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
|
||||
week_path <- file.path(mosaic_dir, week_file)
|
||||
|
||||
if (!file.exists(week_path)) {
|
||||
safe_log(paste("Weekly mosaic not found:", week_path), "WARNING")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# FIRST: Try to load single-file mosaic (legacy approach)
|
||||
if (file.exists(week_path)) {
|
||||
tryCatch({
|
||||
mosaic_raster <- terra::rast(week_path)
|
||||
# Extract CI band by name or position
|
||||
if ("CI" %in% names(mosaic_raster)) {
|
||||
ci_raster <- mosaic_raster[["CI"]]
|
||||
} else {
|
||||
# Fallback: assume last band is CI (after Red, Green, Blue, NIR)
|
||||
ci_raster <- mosaic_raster[[nlyr(mosaic_raster)]]
|
||||
}
|
||||
ci_raster <- mosaic_raster[[5]] # CI is the 5th band
|
||||
names(ci_raster) <- "CI"
|
||||
safe_log(paste("Loaded weekly mosaic:", week_file))
|
||||
safe_log(paste("Loaded weekly mosaic (single-file):", week_file))
|
||||
return(ci_raster)
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error loading mosaic:", e$message), "ERROR")
|
||||
return(NULL)
|
||||
})
|
||||
}
|
||||
|
||||
# SECOND: Per-field architecture - store mosaic_dir path for later per-field loading
|
||||
# Don't try to merge - just return the directory path so field-level functions can load per-field
|
||||
if (dir.exists(mosaic_dir)) {
|
||||
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
|
||||
field_dirs <- field_dirs[field_dirs != ""]
|
||||
|
||||
# Check if any field has this week's mosaic
|
||||
found_any <- FALSE
|
||||
for (field in field_dirs) {
|
||||
field_mosaic_path <- file.path(mosaic_dir, field, week_file)
|
||||
if (file.exists(field_mosaic_path)) {
|
||||
found_any <- TRUE
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (found_any) {
|
||||
safe_log(paste("Found per-field mosaics for week", sprintf("%02d", week_num), year,
|
||||
"- will load per-field on demand"))
|
||||
# Return a special object that indicates per-field loading is needed
|
||||
# Store the mosaic_dir path in the raster's metadata
|
||||
dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA)
|
||||
attr(dummy_raster, "per_field_dir") <- mosaic_dir
|
||||
attr(dummy_raster, "week_file") <- week_file
|
||||
attr(dummy_raster, "is_per_field") <- TRUE
|
||||
return(dummy_raster)
|
||||
}
|
||||
}
|
||||
|
||||
# If we get here, no mosaic found
|
||||
safe_log(paste("Weekly mosaic not found for week", sprintf("%02d", week_num), year), "WARNING")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Function to prepare predictions with consistent naming and formatting
|
||||
|
|
@ -141,12 +204,16 @@ calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) {
|
|||
# Extract field boundary
|
||||
field_vect <- field_boundaries_vect[i]
|
||||
|
||||
# crop ci_raster with field_vect and use that for ci_values
|
||||
cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE)
|
||||
# Load appropriate CI raster using helper function
|
||||
cropped_raster <- load_field_ci_raster(ci_raster, field_name, field_vect)
|
||||
|
||||
# Extract CI values for this field using helper function
|
||||
if (!is.null(cropped_raster)) {
|
||||
field_values <- extract_ci_values(cropped_raster, field_vect)
|
||||
valid_values <- field_values[!is.na(field_values) & is.finite(field_values)]
|
||||
} else {
|
||||
valid_values <- c()
|
||||
}
|
||||
|
||||
# If all valid values are 0 (cloud), fill with NA row
|
||||
if (length(valid_values) == 0 || all(valid_values == 0)) {
|
||||
|
|
@ -286,9 +353,18 @@ calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries)
|
|||
# Extract field boundary
|
||||
field_vect <- field_boundaries_vect[i]
|
||||
|
||||
# Extract CI values for both weeks (using helper to get CI band only)
|
||||
current_values <- extract_ci_values(current_ci, field_vect)
|
||||
previous_values <- extract_ci_values(previous_ci, field_vect)
|
||||
# Load appropriate CI rasters using helper function
|
||||
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
|
||||
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
|
||||
|
||||
# Extract CI values for both weeks
|
||||
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
|
||||
current_values <- extract_ci_values(current_field_ci, field_vect)
|
||||
previous_values <- extract_ci_values(previous_field_ci, field_vect)
|
||||
} else {
|
||||
current_values <- c()
|
||||
previous_values <- c()
|
||||
}
|
||||
|
||||
# Clean values
|
||||
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
||||
|
|
@ -569,8 +645,18 @@ calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundari
|
|||
sub_field_name <- field_boundaries$sub_field[i]
|
||||
field_vect <- field_boundaries_vect[i]
|
||||
|
||||
# Extract CI values for both weeks (using helper to get CI band only)
|
||||
current_values <- extract_ci_values(current_ci, field_vect)
|
||||
# Load appropriate CI rasters using helper function
|
||||
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
|
||||
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
|
||||
|
||||
# Extract CI values for both weeks
|
||||
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
|
||||
current_values <- extract_ci_values(current_field_ci, field_vect)
|
||||
previous_values <- extract_ci_values(previous_field_ci, field_vect)
|
||||
} else {
|
||||
current_values <- c()
|
||||
previous_values <- c()
|
||||
}
|
||||
previous_values <- extract_ci_values(previous_ci, field_vect)
|
||||
|
||||
# Clean values
|
||||
|
|
@ -728,8 +814,17 @@ calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundarie
|
|||
}
|
||||
|
||||
# Extract CI values for both weeks (using helper to get CI band only)
|
||||
current_values <- extract_ci_values(current_ci, field_vect)
|
||||
previous_values <- extract_ci_values(previous_ci, field_vect)
|
||||
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
|
||||
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
|
||||
|
||||
# Extract CI values for both weeks
|
||||
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
|
||||
current_values <- extract_ci_values(current_field_ci, field_vect)
|
||||
previous_values <- extract_ci_values(previous_field_ci, field_vect)
|
||||
} else {
|
||||
current_values <- c()
|
||||
previous_values <- c()
|
||||
}
|
||||
|
||||
# Clean values
|
||||
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
||||
|
|
@ -810,8 +905,15 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
|||
sub_field_name <- field_boundaries$sub_field[i]
|
||||
field_vect <- field_boundaries_vect[i]
|
||||
|
||||
# Load appropriate CI raster using helper function
|
||||
field_ci <- load_field_ci_raster(ci_raster, field_name, field_vect)
|
||||
|
||||
# Extract CI values using helper function
|
||||
ci_values <- extract_ci_values(ci_raster, field_vect)
|
||||
if (!is.null(field_ci)) {
|
||||
ci_values <- extract_ci_values(field_ci, field_vect)
|
||||
} else {
|
||||
ci_values <- c()
|
||||
}
|
||||
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
|
||||
|
||||
if (length(valid_values) > 1) {
|
||||
File diff suppressed because it is too large
Load diff
1240
r_app/parameters_project_OLD.R
Normal file
1240
r_app/parameters_project_OLD.R
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -4,24 +4,6 @@
|
|||
# These functions support the creation of maps, charts and report elements
|
||||
# for the CI_report_dashboard_planet.Rmd document.
|
||||
|
||||
#' Safe logging function that works whether log_message exists or not
|
||||
#'
|
||||
#' @param message The message to log
|
||||
#' @param level The log level (default: "INFO")
|
||||
#' @return NULL (used for side effects)
|
||||
#'
|
||||
safe_log <- function(message, level = "INFO") {
|
||||
if (exists("log_message")) {
|
||||
log_message(message, level)
|
||||
} else {
|
||||
if (level %in% c("ERROR", "WARNING")) {
|
||||
warning(message)
|
||||
} else {
|
||||
message(message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#' Creates a sub-chunk for use within RMarkdown documents
|
||||
#'
|
||||
#' @param g A ggplot object to render in the sub-chunk
|
||||
|
|
@ -244,14 +226,14 @@ ci_plot <- function(pivotName,
|
|||
# Filter for the specific pivot
|
||||
AllPivots2 <- field_boundaries %>% dplyr::filter(field %in% pivotName)
|
||||
|
||||
# Create crop masks for different timepoints using terra functions
|
||||
singlePivot <- terra::crop(current_ci, pivotShape) %>% terra::mask(., pivotShape)
|
||||
singlePivot_m1 <- terra::crop(ci_minus_1, pivotShape) %>% terra::mask(., pivotShape)
|
||||
singlePivot_m2 <- terra::crop(ci_minus_2, pivotShape) %>% terra::mask(., pivotShape)
|
||||
# Per-field mosaics are already cropped to field boundaries, so use directly without cropping
|
||||
singlePivot <- current_ci
|
||||
singlePivot_m1 <- ci_minus_1
|
||||
singlePivot_m2 <- ci_minus_2
|
||||
|
||||
# Create difference maps
|
||||
abs_CI_last_week <- terra::crop(last_week_diff, pivotShape) %>% terra::mask(., pivotShape)
|
||||
abs_CI_three_week <- terra::crop(three_week_diff, pivotShape) %>% terra::mask(., pivotShape)
|
||||
# Use difference maps directly (already field-specific)
|
||||
abs_CI_last_week <- last_week_diff
|
||||
abs_CI_three_week <- three_week_diff
|
||||
|
||||
# Get planting date
|
||||
planting_date <- harvesting_data %>%
|
||||
|
|
@ -822,3 +804,144 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
|||
})
|
||||
}
|
||||
|
||||
#' Aggregate per-field weekly mosaics into a farm-level mosaic
|
||||
#'
|
||||
#' Reads all per-field mosaic TIFs for a given week and merges them into a single farm-level mosaic.
|
||||
#' This is used for overview maps in the report (Script 90).
|
||||
#'
|
||||
#' Per-field mosaics already have proper geospatial metadata (CRS, geotransform) from Script 10,
|
||||
#' so terra can align them automatically without needing field boundaries or extent information.
|
||||
#'
|
||||
#' @param weekly_mosaic_dir Path to weekly_mosaic directory (e.g., "laravel_app/storage/app/{project}/weekly_mosaic")
|
||||
#' @param target_week ISO week number (e.g., 52)
|
||||
#' @param target_year ISO year (e.g., 2025)
|
||||
#' @return SpatRaster object (5-band: R,G,B,NIR,CI) or NULL if no fields found
|
||||
#'
|
||||
#' @details
|
||||
#' Per-field mosaics are located at: weekly_mosaic/{FIELD}/week_WW_YYYY.tif
|
||||
#' This function:
|
||||
#' 1. Finds all per-field subdirectories
|
||||
#' 2. Loads each field's weekly mosaic
|
||||
#' 3. Merges to a single raster using terra::mosaic() (alignment handled automatically by metadata)
|
||||
#' 4. Returns combined 5-band raster for visualization
|
||||
#'
|
||||
aggregate_per_field_mosaics_to_farm_level <- function(
|
||||
weekly_mosaic_dir,
|
||||
target_week,
|
||||
target_year
|
||||
) {
|
||||
|
||||
tryCatch({
|
||||
|
||||
# Validate directory exists
|
||||
if (!dir.exists(weekly_mosaic_dir)) {
|
||||
safe_log(paste("Weekly mosaic directory not found:", weekly_mosaic_dir), "WARNING")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Find all per-field subdirectories (non-TIF files at top level)
|
||||
all_items <- list.files(weekly_mosaic_dir, full.names = FALSE)
|
||||
field_dirs <- all_items[
|
||||
!grepl("\\.tif$", all_items, ignore.case = TRUE) &
|
||||
dir.exists(file.path(weekly_mosaic_dir, all_items))
|
||||
]
|
||||
|
||||
if (length(field_dirs) == 0) {
|
||||
safe_log(paste("No per-field directories found in", weekly_mosaic_dir), "WARNING")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
safe_log(paste("Found", length(field_dirs), "field directories. Aggregating week",
|
||||
sprintf("%02d", target_week), target_year), "INFO")
|
||||
|
||||
# Collect rasters from each field
|
||||
raster_list <- list()
|
||||
|
||||
for (field_dir in field_dirs) {
|
||||
field_mosaic_path <- file.path(
|
||||
weekly_mosaic_dir,
|
||||
field_dir,
|
||||
paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif")
|
||||
)
|
||||
|
||||
if (file.exists(field_mosaic_path)) {
|
||||
tryCatch({
|
||||
r <- terra::rast(field_mosaic_path)
|
||||
raster_list[[field_dir]] <- r
|
||||
safe_log(paste("Loaded mosaic for field:", field_dir), "DEBUG")
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Could not load mosaic for field", field_dir, ":", e$message), "WARNING")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (length(raster_list) == 0) {
|
||||
safe_log(paste("No field mosaics found for week", sprintf("%02d", target_week), target_year), "WARNING")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
safe_log(paste("Successfully loaded mosaics for", length(raster_list), "fields"), "INFO")
|
||||
|
||||
# Create a SpatRasterCollection and mosaic using correct terra syntax
|
||||
tryCatch({
|
||||
rsrc <- terra::sprc(raster_list)
|
||||
safe_log(paste("Created SpatRasterCollection with", length(raster_list), "rasters"), "DEBUG")
|
||||
|
||||
# Mosaic the rasters - this merges them into a single continuous raster
|
||||
farm_mosaic <- terra::mosaic(rsrc)
|
||||
|
||||
safe_log(paste("Aggregated", length(raster_list), "per-field mosaics into farm-level mosaic"), "INFO")
|
||||
|
||||
# Verify mosaic was created successfully
|
||||
if (is.null(farm_mosaic)) {
|
||||
stop("mosaic() returned NULL")
|
||||
}
|
||||
|
||||
return(farm_mosaic)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error during mosaic creation:", e$message), "ERROR")
|
||||
return(NULL)
|
||||
})
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error aggregating per-field mosaics:", e$message), "ERROR")
|
||||
return(NULL)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
#' Get per-field mosaic path (new per-field architecture)
|
||||
#'
|
||||
#' Returns the path to a per-field weekly mosaic for direct visualization.
|
||||
#' Replaces the old cropping workflow: now we load the field's own mosaic instead of cropping farm-level.
|
||||
#'
|
||||
#' @param weekly_mosaic_dir Path to weekly_mosaic directory
|
||||
#' @param field_name Name of the field
|
||||
#' @param target_week ISO week number
|
||||
#' @param target_year ISO year
|
||||
#' @return Path to field-specific mosaic TIF, or NULL if not found
|
||||
#'
|
||||
get_per_field_mosaic_path <- function(
|
||||
weekly_mosaic_dir,
|
||||
field_name,
|
||||
target_week,
|
||||
target_year
|
||||
) {
|
||||
|
||||
path <- file.path(
|
||||
weekly_mosaic_dir,
|
||||
field_name,
|
||||
paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif")
|
||||
)
|
||||
|
||||
if (file.exists(path)) {
|
||||
return(path)
|
||||
} else {
|
||||
safe_log(paste("Per-field mosaic not found for field", field_name,
|
||||
"week", sprintf("%02d", target_week), target_year), "WARNING")
|
||||
return(NULL)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,11 +5,12 @@
|
|||
# 1. Python: Download Planet images
|
||||
# 2. R 10: Create master grid and split TIFFs
|
||||
# 3. R 20: CI Extraction
|
||||
# 4. R 21: Convert CI RDS to CSV
|
||||
# 5. R 30: Interpolate growth model
|
||||
# 4. R 30: Interpolate growth model
|
||||
# 5. R 21: Convert CI RDS to CSV (uses Script 30 output)
|
||||
# 6. Python 31: Harvest imminent weekly
|
||||
# 7. R 40: Mosaic creation
|
||||
# 8. R 80: Calculate KPIs
|
||||
# 9. R 90 (Agronomic) OR R 91 (Cane Supply): Generate Word Report
|
||||
#
|
||||
# ==============================================================================
|
||||
# HOW TO RUN THIS SCRIPT
|
||||
|
|
@ -30,11 +31,12 @@
|
|||
# ==============================================================================
|
||||
|
||||
# *** EDIT THESE VARIABLES ***
|
||||
end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date()
|
||||
end_date <- as.Date("2026-02-04") # or specify: as.Date("2026-01-27") , Sys.Date()
|
||||
offset <- 7 # days to look back
|
||||
project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba"
|
||||
data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif"
|
||||
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
|
||||
migrate_legacy_format <- TRUE # Set to TRUE to migrate from old format (merged_tif/merged_tif_8b) to new format (field_tiles)
|
||||
# *** NOTE: data_source is now unified - all projects use field_tiles after migration ***
|
||||
# ***************************
|
||||
|
||||
# Format dates
|
||||
|
|
@ -48,23 +50,35 @@ pipeline_success <- TRUE
|
|||
# ==============================================================================
|
||||
cat("\n========== CHECKING EXISTING OUTPUTS ==========\n")
|
||||
|
||||
# Check Script 10 outputs (tiled splits)
|
||||
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
|
||||
tiles_dates <- if (dir.exists(tiles_dir)) {
|
||||
list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
|
||||
} else {
|
||||
c()
|
||||
# Check Script 10 outputs (field_tiles with per-field TIFFs)
|
||||
# Script 10 outputs to field_tiles/{field_id}/{date}.tif
|
||||
field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
|
||||
tiles_dates <- c()
|
||||
if (dir.exists(field_tiles_dir)) {
|
||||
# Get all field subdirectories
|
||||
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||
if (length(field_dirs) > 0) {
|
||||
# Get unique dates from all field directories
|
||||
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
tiles_dates <- unique(sub("\\.tif$", "", all_files))
|
||||
}
|
||||
}
|
||||
cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates)))
|
||||
cat(sprintf("Script 10: %d dates already tiled (field_tiles/)\n", length(tiles_dates)))
|
||||
|
||||
# Check Script 20 outputs (CI extraction) - daily RDS files
|
||||
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
|
||||
ci_files <- if (dir.exists(ci_daily_dir)) {
|
||||
list.files(ci_daily_dir, pattern = "\\.rds$")
|
||||
} else {
|
||||
c()
|
||||
# Check Script 20 outputs (CI extraction) - per-field CI TIFFs at field_tiles_CI/{FIELD}/{DATE}.tif
|
||||
# NOTE: This is the NEW per-field format, not the old extracted_ci/ flat format
|
||||
field_tiles_ci_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
|
||||
ci_tiff_dates <- c()
|
||||
if (dir.exists(field_tiles_ci_dir)) {
|
||||
# Get all field subdirectories
|
||||
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
|
||||
if (length(field_dirs) > 0) {
|
||||
# Get unique dates from all field directories (dates that have been processed through Script 20)
|
||||
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
ci_tiff_dates <- unique(sub("\\.tif$", "", all_files))
|
||||
}
|
||||
}
|
||||
cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
|
||||
cat(sprintf("Script 20: %d dates already processed (field_tiles_CI/)\n", length(ci_tiff_dates)))
|
||||
|
||||
# Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this
|
||||
# Instead, check if CI RDS files exist - if they do, 21 should also run
|
||||
|
|
@ -72,7 +86,7 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
|
|||
cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")
|
||||
|
||||
# Check Script 40 outputs (mosaics in weekly_tile_max/5x5)
|
||||
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
|
||||
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
|
||||
mosaic_files <- if (dir.exists(mosaic_dir)) {
|
||||
list.files(mosaic_dir, pattern = "\\.tif$")
|
||||
} else {
|
||||
|
|
@ -90,9 +104,9 @@ kpi_files <- if (dir.exists(kpi_dir)) {
|
|||
cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))
|
||||
|
||||
# Determine if scripts should run based on outputs
|
||||
skip_10 <- length(tiles_dates) > 0 && !force_rerun
|
||||
skip_20 <- length(ci_files) > 0 && !force_rerun
|
||||
skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped
|
||||
skip_10 <- FALSE # Script 10 should always run to pick up any new merged_tif files
|
||||
skip_20 <- FALSE # Script 20 always runs to process dates in the current window (per-field format)
|
||||
skip_21 <- FALSE # Skip 21 only if 20 is skipped
|
||||
skip_40 <- length(mosaic_files) > 0 && !force_rerun
|
||||
skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks
|
||||
|
||||
|
|
@ -110,25 +124,50 @@ cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n")
|
|||
tryCatch({
|
||||
# Setup paths
|
||||
base_path <- file.path("laravel_app", "storage", "app", project_dir)
|
||||
merged_tifs_dir <- file.path(base_path, data_source)
|
||||
|
||||
# Get existing dates from raw TIFFs
|
||||
# Always check merged_tif/ for existing downloads (both modes)
|
||||
# merged_tif/ is where Python downloads go, before Script 10 splits to field_tiles/
|
||||
merged_tifs_dir <- file.path(base_path, "merged_tif")
|
||||
existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)
|
||||
|
||||
# Get existing dates from tiles (better indicator of completion)
|
||||
existing_tile_dates <- tiles_dates
|
||||
if (migrate_legacy_format) {
|
||||
cat(sprintf(" Migration mode: Checking merged_tif/ for existing dates\n"))
|
||||
} else {
|
||||
cat(sprintf(" Production mode: Checking merged_tif/ and field_tiles/ for existing dates\n"))
|
||||
}
|
||||
|
||||
# Find missing dates in the window
|
||||
# Window: from (end_date - offset) to end_date
|
||||
# Example: if end_date=2026-02-04 and offset=7, window is 2026-01-28 to 2026-02-04 (8 dates)
|
||||
start_date <- end_date - offset
|
||||
date_seq <- seq(start_date, end_date, by = "day")
|
||||
target_dates <- format(date_seq, "%Y-%m-%d")
|
||||
|
||||
# Only download if tiles don't exist yet (more reliable than checking raw TIFFs)
|
||||
missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]
|
||||
# Also check field_tiles/ for dates that have already been processed through Script 10
|
||||
# field_tiles/ contains {field_id}/{date}.tif files - check which dates are present
|
||||
field_tiles_dir <- file.path(base_path, "field_tiles")
|
||||
processed_dates <- c()
|
||||
if (dir.exists(field_tiles_dir)) {
|
||||
# Get all field subdirectories
|
||||
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||
if (length(field_dirs) > 0) {
|
||||
# Get unique dates from all field directories
|
||||
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
processed_dates <- unique(sub("\\.tif$", "", all_files))
|
||||
}
|
||||
}
|
||||
|
||||
cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates)))
|
||||
cat(sprintf(" Missing dates in window: %d\n", length(missing_dates)))
|
||||
# Combine existing dates from both merged_tif and field_tiles
|
||||
all_existing_dates <- unique(c(existing_tiff_dates, processed_dates))
|
||||
|
||||
# Compare: which target dates don't exist in merged_tif/ or field_tiles/?
|
||||
missing_dates <- target_dates[!(target_dates %in% all_existing_dates)]
|
||||
|
||||
cat(sprintf(" Existing dates in merged_tif/: %d\n", length(existing_tiff_dates)))
|
||||
cat(sprintf(" Processed dates in field_tiles/: %d\n", length(processed_dates)))
|
||||
cat(sprintf(" Target window: %s to %s (%d dates)\n", start_date, end_date, length(target_dates)))
|
||||
cat(sprintf(" Missing dates to download: %d\n", length(missing_dates)))
|
||||
|
||||
# Download each missing date
|
||||
download_count <- 0
|
||||
|
|
@ -170,6 +209,51 @@ tryCatch({
|
|||
pipeline_success <<- FALSE
|
||||
})
|
||||
|
||||
# ==============================================================================
|
||||
# MIGRATION: Move legacy format files to new format (if enabled)
|
||||
# ==============================================================================
|
||||
if (pipeline_success && migrate_legacy_format) {
|
||||
cat("\n========== MIGRATION: MOVING LEGACY FORMAT FILES ==========\n")
|
||||
tryCatch({
|
||||
base_path <- file.path("laravel_app", "storage", "app", project_dir)
|
||||
|
||||
# PART 1: Move merged_tif files to field_tiles
|
||||
merged_tif_old <- file.path(base_path, "merged_tif")
|
||||
field_tiles_new <- file.path(base_path, "field_tiles")
|
||||
|
||||
if (dir.exists(merged_tif_old)) {
|
||||
tif_files <- list.files(merged_tif_old, pattern = "\\.tif$", full.names = TRUE)
|
||||
if (length(tif_files) > 0) {
|
||||
dir.create(field_tiles_new, showWarnings = FALSE, recursive = TRUE)
|
||||
for (file in tif_files) {
|
||||
file.rename(file, file.path(field_tiles_new, basename(file)))
|
||||
}
|
||||
cat(sprintf("✓ Moved %d TIFF files from merged_tif/ to field_tiles/\n", length(tif_files)))
|
||||
}
|
||||
}
|
||||
|
||||
# PART 2: Move merged_tif_final files (CI) to field_tiles_CI
|
||||
merged_tif_final_old <- file.path(base_path, "merged_tif_final")
|
||||
field_tiles_ci_new <- file.path(base_path, "field_tiles_CI")
|
||||
|
||||
if (dir.exists(merged_tif_final_old)) {
|
||||
ci_files <- list.files(merged_tif_final_old, pattern = "\\.tif$", full.names = TRUE)
|
||||
if (length(ci_files) > 0) {
|
||||
dir.create(field_tiles_ci_new, showWarnings = FALSE, recursive = TRUE)
|
||||
for (file in ci_files) {
|
||||
file.rename(file, file.path(field_tiles_ci_new, basename(file)))
|
||||
}
|
||||
cat(sprintf("✓ Moved %d CI TIFF files from merged_tif_final/ to field_tiles_CI/\n", length(ci_files)))
|
||||
}
|
||||
}
|
||||
|
||||
cat("✓ Migration completed successfully\n")
|
||||
}, error = function(e) {
|
||||
cat("✗ Error in migration:", e$message, "\n")
|
||||
pipeline_success <<- FALSE
|
||||
})
|
||||
}
|
||||
|
||||
# ==============================================================================
|
||||
# SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs
|
||||
# ==============================================================================
|
||||
|
|
@ -178,20 +262,38 @@ if (pipeline_success && !skip_10) {
|
|||
tryCatch({
|
||||
# Set environment variables for the script (Script 10 uses these for filtering)
|
||||
assign("PROJECT", project_dir, envir = .GlobalEnv)
|
||||
assign("end_date", end_date, envir = .GlobalEnv)
|
||||
assign("offset", offset, envir = .GlobalEnv)
|
||||
|
||||
# Count field_tiles/ dates BEFORE Script 10 runs
|
||||
field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
|
||||
field_dirs_before <- c()
|
||||
if (dir.exists(field_tiles_dir)) {
|
||||
field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||
if (length(field_dirs_tmp) > 0) {
|
||||
all_files_before <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
field_dirs_before <- unique(sub("\\.tif$", "", all_files_before))
|
||||
}
|
||||
}
|
||||
|
||||
# Suppress verbose per-date output, show only summary
|
||||
sink(nullfile())
|
||||
source("r_app/10_create_master_grid_and_split_tiffs.R")
|
||||
source("r_app/10_create_per_field_tiffs.R")
|
||||
sink()
|
||||
|
||||
# Verify output
|
||||
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
|
||||
if (dir.exists(tiles_dir)) {
|
||||
subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
|
||||
cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs)))
|
||||
} else {
|
||||
cat("✓ Script 10 completed\n")
|
||||
# Count field_tiles/ dates AFTER Script 10 runs
|
||||
field_dirs_after <- c()
|
||||
if (dir.exists(field_tiles_dir)) {
|
||||
field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||
if (length(field_dirs_tmp) > 0) {
|
||||
all_files_after <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
field_dirs_after <- unique(sub("\\.tif$", "", all_files_after))
|
||||
}
|
||||
}
|
||||
|
||||
# Calculate newly added dates
|
||||
newly_added <- length(field_dirs_after) - length(field_dirs_before)
|
||||
cat(sprintf("✓ Script 10 completed - processed %d new dates (total: %d dates in field_tiles/)\n", max(0, newly_added), length(field_dirs_after)))
|
||||
}, error = function(e) {
|
||||
sink()
|
||||
cat("✗ Error in Script 10:", e$message, "\n")
|
||||
|
|
@ -211,19 +313,71 @@ if (pipeline_success && !skip_20) {
|
|||
assign("end_date", end_date, envir = .GlobalEnv)
|
||||
assign("offset", offset, envir = .GlobalEnv)
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
assign("data_source", data_source, envir = .GlobalEnv)
|
||||
|
||||
source("r_app/20_ci_extraction.R")
|
||||
main() # Call main() to execute the script with the environment variables
|
||||
# If in migration mode, find all dates that need processing
|
||||
if (migrate_legacy_format) {
|
||||
cat("Migration mode: Finding all dates in field_tiles/ that need CI processing...\n")
|
||||
|
||||
# Verify CI output was created
|
||||
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
|
||||
if (dir.exists(ci_daily_dir)) {
|
||||
files <- list.files(ci_daily_dir, pattern = "\\.rds$")
|
||||
cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files)))
|
||||
} else {
|
||||
cat("✓ Script 20 completed\n")
|
||||
# Get all dates from field_tiles/
|
||||
field_tiles_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
|
||||
all_dates_in_tiles <- c()
|
||||
if (dir.exists(field_tiles_dir_check)) {
|
||||
field_dirs_tmp <- list.dirs(field_tiles_dir_check, full.names = TRUE, recursive = FALSE)
|
||||
if (length(field_dirs_tmp) > 0) {
|
||||
all_files_tmp <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
all_dates_in_tiles <- unique(sub("\\.tif$", "", all_files_tmp))
|
||||
}
|
||||
}
|
||||
|
||||
# Get dates already processed in field_tiles_CI/
|
||||
field_tiles_ci_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
|
||||
processed_ci_dates <- c()
|
||||
if (dir.exists(field_tiles_ci_check)) {
|
||||
field_dirs_ci <- list.dirs(field_tiles_ci_check, full.names = TRUE, recursive = FALSE)
|
||||
if (length(field_dirs_ci) > 0) {
|
||||
all_files_ci <- list.files(field_dirs_ci, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
processed_ci_dates <- unique(sub("\\.tif$", "", all_files_ci))
|
||||
}
|
||||
}
|
||||
|
||||
# Get dates already in old RDS format
|
||||
old_rds_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
|
||||
processed_rds_dates <- c()
|
||||
if (dir.exists(old_rds_dir)) {
|
||||
rds_files <- list.files(old_rds_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$")
|
||||
processed_rds_dates <- unique(sub("\\.rds$", "", rds_files))
|
||||
}
|
||||
|
||||
# Find dates missing from either location
|
||||
dates_missing_ci <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_ci_dates)]
|
||||
dates_missing_rds <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_rds_dates)]
|
||||
dates_to_process_migration <- sort(unique(c(dates_missing_ci, dates_missing_rds)))
|
||||
|
||||
cat(sprintf(" All dates in field_tiles/: %d\n", length(all_dates_in_tiles)))
|
||||
cat(sprintf(" Already in field_tiles_CI/: %d\n", length(processed_ci_dates)))
|
||||
cat(sprintf(" Already in extracted_ci/daily_vals/: %d\n", length(processed_rds_dates)))
|
||||
cat(sprintf(" Dates needing processing: %d\n", length(dates_to_process_migration)))
|
||||
|
||||
if (length(dates_to_process_migration) > 0) {
|
||||
assign("dates_to_process", dates_to_process_migration, envir = .GlobalEnv)
|
||||
cat(sprintf(" Will process: %s to %s\n", dates_to_process_migration[1], dates_to_process_migration[length(dates_to_process_migration)]))
|
||||
}
|
||||
}
|
||||
|
||||
source("r_app/20_ci_extraction_per_field.R")
|
||||
main()
|
||||
|
||||
# Verify output
|
||||
field_tiles_ci_verify <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
|
||||
tiff_count <- 0
|
||||
if (dir.exists(field_tiles_ci_verify)) {
|
||||
field_dirs_verify <- list.dirs(field_tiles_ci_verify, full.names = TRUE, recursive = FALSE)
|
||||
if (length(field_dirs_verify) > 0) {
|
||||
all_files_verify <- list.files(field_dirs_verify, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||
tiff_count <- length(all_files_verify)
|
||||
}
|
||||
}
|
||||
cat(sprintf("✓ Script 20 completed - %d CI TIFFs in field_tiles_CI/\n", tiff_count))
|
||||
}, error = function(e) {
|
||||
cat("✗ Error in Script 20:", e$message, "\n")
|
||||
pipeline_success <<- FALSE
|
||||
|
|
@ -233,7 +387,35 @@ if (pipeline_success && !skip_20) {
|
|||
}
|
||||
|
||||
# ==============================================================================
|
||||
# SCRIPT 21: CONVERT CI RDS TO CSV
|
||||
# SCRIPT 30: INTERPOLATE GROWTH MODEL
|
||||
# ==============================================================================
|
||||
if (pipeline_success) {
|
||||
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
|
||||
tryCatch({
|
||||
# Set environment variables for the script
|
||||
assign("end_date", end_date, envir = .GlobalEnv)
|
||||
assign("offset", offset, envir = .GlobalEnv)
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
|
||||
source("r_app/30_interpolate_growth_model.R")
|
||||
main() # Call main() to execute the script with the environment variables
|
||||
|
||||
# Verify interpolated output
|
||||
growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
|
||||
if (dir.exists(growth_dir)) {
|
||||
files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
|
||||
cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
|
||||
} else {
|
||||
cat("✓ Script 30 completed\n")
|
||||
}
|
||||
}, error = function(e) {
|
||||
cat("✗ Error in Script 30:", e$message, "\n")
|
||||
pipeline_success <<- FALSE
|
||||
})
|
||||
}
|
||||
|
||||
# ==============================================================================
|
||||
# SCRIPT 21: CONVERT CI RDS TO CSV (uses Script 30 output)
|
||||
# ==============================================================================
|
||||
if (pipeline_success && !skip_21) {
|
||||
cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n")
|
||||
|
|
@ -262,35 +444,6 @@ if (pipeline_success && !skip_21) {
|
|||
cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n")
|
||||
}
|
||||
|
||||
# ==============================================================================
|
||||
# SCRIPT 30: INTERPOLATE GROWTH MODEL
|
||||
# ==============================================================================
|
||||
if (pipeline_success) {
|
||||
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
|
||||
tryCatch({
|
||||
# Set environment variables for the script
|
||||
assign("end_date", end_date, envir = .GlobalEnv)
|
||||
assign("offset", offset, envir = .GlobalEnv)
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
assign("data_source", data_source, envir = .GlobalEnv)
|
||||
|
||||
source("r_app/30_interpolate_growth_model.R")
|
||||
main() # Call main() to execute the script with the environment variables
|
||||
|
||||
# Verify interpolated output
|
||||
growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
|
||||
if (dir.exists(growth_dir)) {
|
||||
files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
|
||||
cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
|
||||
} else {
|
||||
cat("✓ Script 30 completed\n")
|
||||
}
|
||||
}, error = function(e) {
|
||||
cat("✗ Error in Script 30:", e$message, "\n")
|
||||
pipeline_success <<- FALSE
|
||||
})
|
||||
}
|
||||
|
||||
# ==============================================================================
|
||||
# PYTHON 31: HARVEST IMMINENT WEEKLY
|
||||
# ==============================================================================
|
||||
|
|
@ -335,9 +488,8 @@ if (pipeline_success && !skip_40) {
|
|||
assign("end_date", end_date, envir = .GlobalEnv)
|
||||
assign("offset", offset, envir = .GlobalEnv)
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
assign("data_source", data_source, envir = .GlobalEnv)
|
||||
|
||||
source("r_app/40_mosaic_creation.R")
|
||||
source("r_app/40_mosaic_creation_per_field.R")
|
||||
main() # Call main() to execute the script with the environment variables
|
||||
|
||||
# Verify mosaic output
|
||||
|
|
@ -368,7 +520,6 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur
|
|||
assign("end_date_str", end_date_str, envir = .GlobalEnv)
|
||||
assign("offset", offset, envir = .GlobalEnv)
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
assign("data_source", data_source, envir = .GlobalEnv)
|
||||
|
||||
source("r_app/80_calculate_kpis.R")
|
||||
main() # Call main() to execute the script with the environment variables
|
||||
|
|
@ -389,6 +540,92 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur
|
|||
})
|
||||
}
|
||||
|
||||
# ==============================================================================
|
||||
# SCRIPT 90/91: GENERATE WORD REPORTS (CLIENT-TYPE SPECIFIC)
|
||||
# ==============================================================================
|
||||
if (pipeline_success) {
|
||||
# Determine client type from project mapping
|
||||
source("r_app/parameters_project.R")
|
||||
source("r_app/00_common_utils.R")
|
||||
client_type <- get_client_type(project_dir)
|
||||
|
||||
if (client_type == "agronomic_support") {
|
||||
# SCRIPT 90: Agronomic Support Report (for Aura)
|
||||
cat("\n========== RUNNING SCRIPT 90: AGRONOMIC SUPPORT REPORT (WORD) ==========\n")
|
||||
tryCatch({
|
||||
# Render the R Markdown file with parameters
|
||||
# The Rmd file will load parameters_project and utilities internally
|
||||
rmarkdown::render(
|
||||
"r_app/90_CI_report_with_kpis_simple.Rmd",
|
||||
params = list(
|
||||
data_dir = project_dir,
|
||||
report_date = end_date,
|
||||
mail_day = "Monday",
|
||||
borders = TRUE,
|
||||
ci_plot_type = "both",
|
||||
colorblind_friendly = FALSE,
|
||||
facet_by_season = FALSE,
|
||||
x_axis_unit = "days"
|
||||
),
|
||||
output_file = sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str),
|
||||
output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"),
|
||||
quiet = FALSE,
|
||||
knit_root_dir = getwd()
|
||||
)
|
||||
|
||||
# Verify report was created
|
||||
report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports",
|
||||
sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str))
|
||||
if (file.exists(report_file)) {
|
||||
cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", basename(report_file)))
|
||||
} else {
|
||||
cat("⚠ Script 90 report file not found - check rendering\n")
|
||||
}
|
||||
}, error = function(e) {
|
||||
cat("✗ Error in Script 90:", e$message, "\n")
|
||||
print(e)
|
||||
pipeline_success <<- FALSE
|
||||
})
|
||||
} else if (client_type == "cane_supply") {
|
||||
# SCRIPT 91: Cane Supply Report (for Angata, Chemba, Xinavane, ESA)
|
||||
cat("\n========== RUNNING SCRIPT 91: CANE SUPPLY REPORT (WORD) ==========\n")
|
||||
tryCatch({
|
||||
# Render the R Markdown file with parameters
|
||||
# The Rmd file will load parameters_project and utilities internally
|
||||
rmarkdown::render(
|
||||
"r_app/91_CI_report_with_kpis_Angata.Rmd",
|
||||
params = list(
|
||||
data_dir = project_dir,
|
||||
report_date = end_date,
|
||||
mail_day = "Monday",
|
||||
borders = TRUE,
|
||||
ci_plot_type = "both",
|
||||
colorblind_friendly = FALSE,
|
||||
facet_by_season = FALSE,
|
||||
x_axis_unit = "days"
|
||||
),
|
||||
output_file = sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str),
|
||||
output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"),
|
||||
quiet = FALSE,
|
||||
knit_root_dir = getwd()
|
||||
)
|
||||
|
||||
# Verify report was created
|
||||
report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports",
|
||||
sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str))
|
||||
if (file.exists(report_file)) {
|
||||
cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", basename(report_file)))
|
||||
} else {
|
||||
cat("⚠ Script 91 report file not found - check rendering\n")
|
||||
}
|
||||
}, error = function(e) {
|
||||
cat("✗ Error in Script 91:", e$message, "\n")
|
||||
print(e)
|
||||
pipeline_success <<- FALSE
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
# ==============================================================================
|
||||
# SUMMARY
|
||||
# ==============================================================================
|
||||
|
|
@ -401,4 +638,4 @@ if (pipeline_success) {
|
|||
} else {
|
||||
cat("Status: ✗ Pipeline failed - check errors above\n")
|
||||
}
|
||||
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n")
|
||||
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 30 → R 21 → Python 31 → R 40 → R 80 → R 90/91\n")
|
||||
|
|
|
|||
Loading…
Reference in a new issue