Merge branch 'backward-compat-no-tiling' into code-improvements
This commit is contained in:
commit
51d479673d
2
.github/copilot-instructions.md
vendored
2
.github/copilot-instructions.md
vendored
|
|
@ -319,4 +319,4 @@ After each major stage, verify:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`. For related Linear issues (code quality, architecture docs), see SC-59, SC-60, SC-61._
|
_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`.
|
||||||
|
|
|
||||||
|
|
@ -22,9 +22,9 @@ Options:
|
||||||
--resolution RES Resolution in meters (default: 3)
|
--resolution RES Resolution in meters (default: 3)
|
||||||
--skip-merge Skip merge step (download only, keep individual tiles)
|
--skip-merge Skip merge step (download only, keep individual tiles)
|
||||||
--cleanup Delete intermediate single_images folder after merge
|
--cleanup Delete intermediate single_images folder after merge
|
||||||
--clear-singles Clear single_images_8b folder before download
|
--clear-singles Clear single_images folder before download
|
||||||
--clear-merged Clear merged_tif_8b and merged_virtual_8b folders before download
|
--clear-merged Clear merged_tif folder before download
|
||||||
--clear-all Clear all output folders (singles, merged, virtual) before download
|
--clear-all Clear all output folders (singles, merged) before download
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
python download_8band_pu_optimized.py xinavane --clear-singles --cleanup
|
python download_8band_pu_optimized.py xinavane --clear-singles --cleanup
|
||||||
|
|
@ -89,20 +89,118 @@ def setup_config():
|
||||||
config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44')
|
config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44')
|
||||||
config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos')
|
config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos')
|
||||||
|
|
||||||
# BYOC collection for Planet 8-band data
|
|
||||||
collection_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a'
|
|
||||||
byoc = DataCollection.define_byoc(collection_id, name='planet_data_8b', is_timeless=True)
|
|
||||||
|
|
||||||
catalog = SentinelHubCatalog(config=config)
|
catalog = SentinelHubCatalog(config=config)
|
||||||
|
|
||||||
return config, byoc, catalog
|
return config, catalog
|
||||||
|
|
||||||
|
|
||||||
|
def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_days: int = 7) -> Tuple:
|
||||||
|
"""
|
||||||
|
Auto-detect which Planet collection is available for this project.
|
||||||
|
|
||||||
|
Checks a week of dates (backwards from date_str) to ensure robust detection.
|
||||||
|
If ANY date has data in the new 8-band collection, use that.
|
||||||
|
If no dates have data in new collection, fall back to legacy 4-band.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
date_str: Reference date (YYYY-MM-DD)
|
||||||
|
bbox_list: List of bounding boxes for testing
|
||||||
|
catalog: SentinelHubCatalog instance
|
||||||
|
date_range_days: Number of days to check backwards (default: 7)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(byoc, collection_info_dict) where byoc is DataCollection and dict contains metadata
|
||||||
|
"""
|
||||||
|
|
||||||
|
new_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' # 8-band (new)
|
||||||
|
old_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c' # 4-band (legacy)
|
||||||
|
test_bbox = bbox_list[0]
|
||||||
|
|
||||||
|
# Generate date range (backwards from date_str)
|
||||||
|
try:
|
||||||
|
ref_date = datetime.datetime.strptime(date_str, '%Y-%m-%d')
|
||||||
|
except ValueError:
|
||||||
|
print(f"⚠️ Invalid date format: {date_str}. Using today.")
|
||||||
|
ref_date = datetime.datetime.now()
|
||||||
|
|
||||||
|
date_range = [
|
||||||
|
(ref_date - datetime.timedelta(days=i)).strftime('%Y-%m-%d')
|
||||||
|
for i in range(date_range_days)
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"\nAuto-detecting Planet collection (checking {date_range_days} days)...")
|
||||||
|
print(f" Test range: {date_range[-1]} to {date_range[0]}")
|
||||||
|
|
||||||
|
# Try new collection first
|
||||||
|
print(f"\n Trying 8-band collection: {new_id}")
|
||||||
|
byoc_new = DataCollection.define_byoc(new_id, name='planet_data_8b', is_timeless=True)
|
||||||
|
|
||||||
|
for test_date in date_range:
|
||||||
|
try:
|
||||||
|
search = catalog.search(
|
||||||
|
collection=byoc_new,
|
||||||
|
bbox=test_bbox,
|
||||||
|
time=(test_date, test_date),
|
||||||
|
filter=None
|
||||||
|
)
|
||||||
|
tiles = list(search)
|
||||||
|
if len(tiles) > 0:
|
||||||
|
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
|
||||||
|
print(f" ✓ Using 8-band collection")
|
||||||
|
return byoc_new, {
|
||||||
|
'collection_id': new_id,
|
||||||
|
'name': 'planet_data_8b',
|
||||||
|
'bands': 4,
|
||||||
|
'output_folder': 'merged_tif',
|
||||||
|
'singles_folder': 'single_images'
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ⚠️ {test_date}: {str(e)[:60]}")
|
||||||
|
|
||||||
|
# No data in new collection, try legacy
|
||||||
|
print(f"\n ✗ No data found in 8-band collection")
|
||||||
|
print(f" Trying legacy 4-band collection: {old_id}")
|
||||||
|
byoc_old = DataCollection.define_byoc(old_id, name='planet_data', is_timeless=True)
|
||||||
|
|
||||||
|
for test_date in date_range:
|
||||||
|
try:
|
||||||
|
search = catalog.search(
|
||||||
|
collection=byoc_old,
|
||||||
|
bbox=test_bbox,
|
||||||
|
time=(test_date, test_date),
|
||||||
|
filter=None
|
||||||
|
)
|
||||||
|
tiles = list(search)
|
||||||
|
if len(tiles) > 0:
|
||||||
|
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
|
||||||
|
print(f" ✓ Using legacy 4-band collection")
|
||||||
|
return byoc_old, {
|
||||||
|
'collection_id': old_id,
|
||||||
|
'name': 'planet_data',
|
||||||
|
'bands': 4,
|
||||||
|
'output_folder': 'merged_tif',
|
||||||
|
'singles_folder': 'single_images'
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ⚠️ {test_date}: {str(e)[:60]}")
|
||||||
|
|
||||||
|
# Neither collection has data
|
||||||
|
print(f"\n ⚠️ No data found in either collection for {date_range_days} days")
|
||||||
|
print(f" Defaulting to 8-band collection (will attempt download anyway)")
|
||||||
|
return byoc_new, {
|
||||||
|
'collection_id': new_id,
|
||||||
|
'name': 'planet_data_8b',
|
||||||
|
'bands': 4,
|
||||||
|
'output_folder': 'merged_tif',
|
||||||
|
'singles_folder': 'single_images'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output
|
# EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
EVALSCRIPT_4BAND_MASKED = """
|
EVALSCRIPT_8BAND = """
|
||||||
//VERSION=3
|
//VERSION=3
|
||||||
function setup() {
|
function setup() {
|
||||||
return {
|
return {
|
||||||
|
|
@ -117,9 +215,35 @@ EVALSCRIPT_4BAND_MASKED = """
|
||||||
}
|
}
|
||||||
function evaluatePixel(sample) {
|
function evaluatePixel(sample) {
|
||||||
// Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0)
|
// Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0)
|
||||||
// This reduces output pixels and avoids NaN interpolation on client side
|
|
||||||
if (sample.udm1 == 0) {
|
if (sample.udm1 == 0) {
|
||||||
// Scale reflectance: DN → [0, 1] range
|
var scaledRed = 2.5 * sample.red / 10000;
|
||||||
|
var scaledGreen = 2.5 * sample.green / 10000;
|
||||||
|
var scaledBlue = 2.5 * sample.blue / 10000;
|
||||||
|
var scaledNIR = 2.5 * sample.nir / 10000;
|
||||||
|
return [scaledRed, scaledGreen, scaledBlue, scaledNIR];
|
||||||
|
} else {
|
||||||
|
return [NaN, NaN, NaN, NaN];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
EVALSCRIPT_4BAND_LEGACY = """
|
||||||
|
//VERSION=3
|
||||||
|
function setup() {
|
||||||
|
return {
|
||||||
|
input: [{
|
||||||
|
bands: ["red", "green", "blue", "nir", "udm1"],
|
||||||
|
units: "DN"
|
||||||
|
}],
|
||||||
|
output: {
|
||||||
|
bands: 4
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
function evaluatePixel(sample) {
|
||||||
|
// Cloud masking for legacy collection (same band names as new 8-band)
|
||||||
|
// udm1 = 0 means clear, non-zero means cloud/shadow/etc
|
||||||
|
if (sample.udm1 == 0) {
|
||||||
var scaledRed = 2.5 * sample.red / 10000;
|
var scaledRed = 2.5 * sample.red / 10000;
|
||||||
var scaledGreen = 2.5 * sample.green / 10000;
|
var scaledGreen = 2.5 * sample.green / 10000;
|
||||||
var scaledBlue = 2.5 * sample.blue / 10000;
|
var scaledBlue = 2.5 * sample.blue / 10000;
|
||||||
|
|
@ -289,6 +413,7 @@ def download_tile(
|
||||||
output_dir: Path,
|
output_dir: Path,
|
||||||
config,
|
config,
|
||||||
byoc,
|
byoc,
|
||||||
|
evalscript: str,
|
||||||
resolution: int = 3
|
resolution: int = 3
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Download a single full tile (no geometry masking = lower PU) with exponential backoff."""
|
"""Download a single full tile (no geometry masking = lower PU) with exponential backoff."""
|
||||||
|
|
@ -300,9 +425,9 @@ def download_tile(
|
||||||
try:
|
try:
|
||||||
size = bbox_to_dimensions(bbox, resolution=resolution)
|
size = bbox_to_dimensions(bbox, resolution=resolution)
|
||||||
|
|
||||||
# Create download request with 4-band cloud-masked evalscript (uint16)
|
# Create download request with appropriate evalscript for collection
|
||||||
request = SentinelHubRequest(
|
request = SentinelHubRequest(
|
||||||
evalscript=EVALSCRIPT_4BAND_MASKED,
|
evalscript=evalscript,
|
||||||
input_data=[
|
input_data=[
|
||||||
SentinelHubRequest.input_data(
|
SentinelHubRequest.input_data(
|
||||||
data_collection=byoc,
|
data_collection=byoc,
|
||||||
|
|
@ -350,6 +475,8 @@ def download_date(
|
||||||
base_path: Path,
|
base_path: Path,
|
||||||
config,
|
config,
|
||||||
byoc,
|
byoc,
|
||||||
|
evalscript: str,
|
||||||
|
collection_info: dict,
|
||||||
resolution: int = 3
|
resolution: int = 3
|
||||||
) -> int:
|
) -> int:
|
||||||
"""
|
"""
|
||||||
|
|
@ -357,14 +484,14 @@ def download_date(
|
||||||
Returns number of successfully downloaded tiles.
|
Returns number of successfully downloaded tiles.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
output_dir = base_path / 'single_images_8b' / date_str
|
output_dir = base_path / collection_info['singles_folder'] / date_str
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...")
|
print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...")
|
||||||
|
|
||||||
successful = 0
|
successful = 0
|
||||||
for idx, bbox in enumerate(bbox_list, 1):
|
for idx, bbox in enumerate(bbox_list, 1):
|
||||||
if download_tile(date_str, bbox, output_dir, config, byoc, resolution):
|
if download_tile(date_str, bbox, output_dir, config, byoc, evalscript, resolution):
|
||||||
successful += 1
|
successful += 1
|
||||||
|
|
||||||
percentage = (idx / len(bbox_list)) * 100
|
percentage = (idx / len(bbox_list)) * 100
|
||||||
|
|
@ -385,10 +512,10 @@ def download_date(
|
||||||
# MERGE FUNCTION
|
# MERGE FUNCTION
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
def merge_tiles(date_str: str, base_path: Path) -> bool:
|
def merge_tiles(date_str: str, base_path: Path, collection_info: dict) -> bool:
|
||||||
"""Merge downloaded tiles into single GeoTIFF using GDAL."""
|
"""Merge downloaded tiles into single GeoTIFF using GDAL."""
|
||||||
|
|
||||||
single_images_dir = base_path / 'single_images_8b' / date_str
|
single_images_dir = base_path / collection_info['singles_folder'] / date_str
|
||||||
|
|
||||||
# Find all response.tiff files
|
# Find all response.tiff files
|
||||||
file_list = [str(p) for p in single_images_dir.rglob('response.tiff')]
|
file_list = [str(p) for p in single_images_dir.rglob('response.tiff')]
|
||||||
|
|
@ -397,8 +524,8 @@ def merge_tiles(date_str: str, base_path: Path) -> bool:
|
||||||
print(f" ✗ No tiles found to merge")
|
print(f" ✗ No tiles found to merge")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
merged_tif_dir = base_path / 'merged_tif_8b'
|
merged_tif_dir = base_path / collection_info['output_folder']
|
||||||
merged_vrt_dir = base_path / 'merged_virtual_8b'
|
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
|
||||||
merged_tif_dir.mkdir(parents=True, exist_ok=True)
|
merged_tif_dir.mkdir(parents=True, exist_ok=True)
|
||||||
merged_vrt_dir.mkdir(parents=True, exist_ok=True)
|
merged_vrt_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
@ -453,9 +580,9 @@ def main():
|
||||||
|
|
||||||
# Parse arguments
|
# Parse arguments
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Download Planet 8-band imagery with PU optimization'
|
description='Download Planet imagery with PU optimization (auto-detects 8-band vs legacy 4-band)'
|
||||||
)
|
)
|
||||||
parser.add_argument('project', help='Project name (angata, chemba, xinavane, etc.)')
|
parser.add_argument('project', help='Project name (angata, chemba, xinavane, aura, etc.)')
|
||||||
parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today')
|
parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today')
|
||||||
parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)')
|
parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)')
|
||||||
parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)')
|
parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)')
|
||||||
|
|
@ -481,7 +608,7 @@ def main():
|
||||||
date_str = datetime.date.today().strftime('%Y-%m-%d')
|
date_str = datetime.date.today().strftime('%Y-%m-%d')
|
||||||
|
|
||||||
print(f"{'='*70}")
|
print(f"{'='*70}")
|
||||||
print(f"Planet 8-Band Download - PU Optimized")
|
print(f"Planet Download - Auto-Detecting Collection (PU Optimized)")
|
||||||
print(f"{'='*70}")
|
print(f"{'='*70}")
|
||||||
print(f"Project: {args.project}")
|
print(f"Project: {args.project}")
|
||||||
print(f"Date: {date_str}")
|
print(f"Date: {date_str}")
|
||||||
|
|
@ -489,7 +616,7 @@ def main():
|
||||||
|
|
||||||
# Setup SentinelHub
|
# Setup SentinelHub
|
||||||
print(f"\nSetting up SentinelHub...")
|
print(f"\nSetting up SentinelHub...")
|
||||||
config, byoc, catalog = setup_config()
|
config, catalog = setup_config()
|
||||||
print(f"✓ SentinelHub configured")
|
print(f"✓ SentinelHub configured")
|
||||||
|
|
||||||
# Load geometries
|
# Load geometries
|
||||||
|
|
@ -504,15 +631,26 @@ def main():
|
||||||
print(f"\n✗ No tiles intersect field geometries. Exiting.")
|
print(f"\n✗ No tiles intersect field geometries. Exiting.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Auto-detect collection and get evalscript
|
||||||
|
byoc, collection_info = detect_collection(date_str, bbox_list, catalog, date_range_days=7)
|
||||||
|
|
||||||
|
# Get appropriate evalscript
|
||||||
|
evalscript = EVALSCRIPT_8BAND if collection_info['bands'] == 4 and 'new' not in collection_info.get('note', '') else EVALSCRIPT_8BAND
|
||||||
|
if '4e56d0cb' not in collection_info['collection_id']:
|
||||||
|
evalscript = EVALSCRIPT_4BAND_LEGACY
|
||||||
|
|
||||||
|
print(f"\n Collection: {collection_info['name']}")
|
||||||
|
print(f" Output folder: {collection_info['output_folder']}/")
|
||||||
|
|
||||||
# Check date availability
|
# Check date availability
|
||||||
print(f"\nChecking data availability...")
|
print(f"\nChecking data availability for {date_str}...")
|
||||||
if not check_date_has_data(date_str, bbox_list[0], catalog, byoc):
|
if not check_date_has_data(date_str, bbox_list[0], catalog, byoc):
|
||||||
print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.")
|
print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Download tiles
|
# Download tiles
|
||||||
print(f"\n{'='*70}")
|
print(f"\n{'='*70}")
|
||||||
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, args.resolution)
|
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, evalscript, collection_info, args.resolution)
|
||||||
|
|
||||||
if downloaded == 0:
|
if downloaded == 0:
|
||||||
print(f"\n✗ No tiles downloaded. Exiting.")
|
print(f"\n✗ No tiles downloaded. Exiting.")
|
||||||
|
|
@ -522,20 +660,20 @@ def main():
|
||||||
if not args.skip_merge:
|
if not args.skip_merge:
|
||||||
print(f"\n{'='*70}")
|
print(f"\n{'='*70}")
|
||||||
print(f"Merging tiles...")
|
print(f"Merging tiles...")
|
||||||
if merge_tiles(date_str, base_path):
|
if merge_tiles(date_str, base_path, collection_info):
|
||||||
print(f"✓ Merge complete")
|
print(f"✓ Merge complete")
|
||||||
|
|
||||||
# Cleanup intermediate files
|
# Cleanup intermediate files
|
||||||
if args.cleanup:
|
if args.cleanup:
|
||||||
print(f"\nCleaning up intermediate files...")
|
print(f"\nCleaning up intermediate files...")
|
||||||
import shutil
|
import shutil
|
||||||
single_images_dir = base_path / 'single_images_8b' / date_str
|
single_images_dir = base_path / collection_info['singles_folder'] / date_str
|
||||||
merged_vrt_dir = base_path / 'merged_virtual_8b'
|
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if single_images_dir.exists():
|
if single_images_dir.exists():
|
||||||
shutil.rmtree(single_images_dir)
|
shutil.rmtree(single_images_dir)
|
||||||
print(f" ✓ Deleted {single_images_dir.name}/{date_str}")
|
print(f" ✓ Deleted {collection_info['singles_folder']}/{date_str}")
|
||||||
|
|
||||||
# Clean old VRT files
|
# Clean old VRT files
|
||||||
for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"):
|
for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"):
|
||||||
|
|
@ -549,7 +687,7 @@ def main():
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
print(f"\n{'='*70}")
|
||||||
print(f"✓ Done!")
|
print(f"✓ Done!")
|
||||||
print(f"Output: {base_path / 'merged_tif_8b' / f'{date_str}.tif'}")
|
print(f"Output: {base_path / collection_info['output_folder'] / f'{date_str}.tif'}")
|
||||||
print(f"{'='*70}")
|
print(f"{'='*70}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,9 +98,9 @@ byoc = DataCollection.define_byoc(
|
||||||
def setup_paths(project):
|
def setup_paths(project):
|
||||||
"""Create and return folder paths."""
|
"""Create and return folder paths."""
|
||||||
BASE_PATH = Path('../laravel_app/storage/app') / project
|
BASE_PATH = Path('../laravel_app/storage/app') / project
|
||||||
BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images_8b')
|
BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images')
|
||||||
folder_for_merged_tifs = str(BASE_PATH / 'merged_tif_8b')
|
folder_for_merged_tifs = str(BASE_PATH / 'merged_tif')
|
||||||
folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual_8b')
|
folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual')
|
||||||
geojson_file = Path(BASE_PATH / 'Data' / 'pivot.geojson')
|
geojson_file = Path(BASE_PATH / 'Data' / 'pivot.geojson')
|
||||||
|
|
||||||
# Create folders if missing
|
# Create folders if missing
|
||||||
|
|
|
||||||
356
r_app/00_common_utils.R
Normal file
356
r_app/00_common_utils.R
Normal file
|
|
@ -0,0 +1,356 @@
|
||||||
|
# ==============================================================================
|
||||||
|
# # 00_COMMON_UTILS.R
|
||||||
|
# ==============================================================================
|
||||||
|
# # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE
|
||||||
|
# #
|
||||||
|
# # PURPOSE:
|
||||||
|
# # Centralized location for foundational utilities used across multiple scripts.
|
||||||
|
# # These functions have NO project knowledge, NO client-type dependencies,
|
||||||
|
# # NO domain-specific logic.
|
||||||
|
# #
|
||||||
|
# # USAGE:
|
||||||
|
# # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file:
|
||||||
|
# #
|
||||||
|
# # source(here::here("r_app", "parameters_project.R")) # Config first
|
||||||
|
# # source(here::here("r_app", "00_common_utils.R")) # Then common utilities
|
||||||
|
# #
|
||||||
|
# # FUNCTIONS:
|
||||||
|
# # 1. safe_log() — Generic logging with [LEVEL] prefix
|
||||||
|
# # 2. smartcane_debug() — Conditional debug logging
|
||||||
|
# # 3. smartcane_warn() — Convenience wrapper for WARN-level messages
|
||||||
|
# # 4. date_list() — Generate date sequences for processing windows
|
||||||
|
# # 5. load_field_boundaries() — Load field geometries from GeoJSON
|
||||||
|
# # 6. repair_geojson_geometries() — Fix invalid geometries in GeoJSON objects
|
||||||
|
# #
|
||||||
|
# # DATE FUNCTIONS (now in parameters_project.R):
|
||||||
|
# # - get_iso_week() — Extract ISO week number from date
|
||||||
|
# # - get_iso_year() — Extract ISO year from date
|
||||||
|
# # - get_iso_week_year() — Extract both ISO week and year as list
|
||||||
|
# # - format_week_label() — Format date as week/year label
|
||||||
|
# # - load_harvesting_data() — Load harvest schedule from Excel
|
||||||
|
# #
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# # Source parameters first to get shared date utility functions
|
||||||
|
# source("parameters_project.R")
|
||||||
|
|
||||||
|
# #' Safe Logging Function
|
||||||
|
# #'
|
||||||
|
# #' Generic logging with [LEVEL] prefix. Works standalone without any framework.
|
||||||
|
# #' Consistent with SmartCane logging standard.
|
||||||
|
# #'
|
||||||
|
# #' @param message The message to log
|
||||||
|
# #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
|
||||||
|
# #' @return NULL (invisible, used for side effects)
|
||||||
|
# #'
|
||||||
|
# #' @examples
|
||||||
|
# #' safe_log("Processing started", "INFO")
|
||||||
|
# #' safe_log("Check input file", "WARNING")
|
||||||
|
# #' safe_log("Failed to load data", "ERROR")
|
||||||
|
# #'
|
||||||
|
# safe_log <- function(message, level = "INFO") {
|
||||||
|
# prefix <- sprintf("[%s]", level)
|
||||||
|
# cat(sprintf("%s %s\n", prefix, message))
|
||||||
|
# }
|
||||||
|
|
||||||
|
# #' SmartCane Debug Logging (Conditional)
|
||||||
|
# #'
|
||||||
|
# #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
|
||||||
|
# #' Useful for development/troubleshooting without cluttering normal output.
|
||||||
|
# #'
|
||||||
|
# #' @param message The message to log
|
||||||
|
# #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
|
||||||
|
# #' @return NULL (invisible, used for side effects)
|
||||||
|
# #'
|
||||||
|
# #' @examples
|
||||||
|
# #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
|
||||||
|
# #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
|
||||||
|
# #'
|
||||||
|
# smartcane_debug <- function(message, verbose = FALSE) {
|
||||||
|
# if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
|
||||||
|
# return(invisible(NULL))
|
||||||
|
# }
|
||||||
|
# safe_log(message, level = "DEBUG")
|
||||||
|
# }
|
||||||
|
|
||||||
|
# #' SmartCane Warning Logging
|
||||||
|
# #'
|
||||||
|
# #' Logs WARN-level messages. Convenience wrapper around safe_log().
|
||||||
|
# #'
|
||||||
|
# #' @param message The message to log
|
||||||
|
# #' @return NULL (invisible, used for side effects)
|
||||||
|
# #'
|
||||||
|
# #' @examples
|
||||||
|
# #' smartcane_warn("Check data format before proceeding")
|
||||||
|
# #'
|
||||||
|
# smartcane_warn <- function(message) {
|
||||||
|
# safe_log(message, level = "WARN")
|
||||||
|
# }
|
||||||
|
|
||||||
|
# #' Load Field Boundaries from GeoJSON
|
||||||
|
# #'
|
||||||
|
# #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
|
||||||
|
# #' Handles CRS validation and column standardization.
|
||||||
|
# #'
|
||||||
|
# #' @param data_dir Directory containing GeoJSON file
|
||||||
|
# #' @return List with elements:
|
||||||
|
# #' - field_boundaries_sf: sf (Simple Features) object
|
||||||
|
# #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
|
||||||
|
# #'
|
||||||
|
# #' @details
|
||||||
|
# #' Automatically selects pivot_2.geojson for ESA project during CI extraction,
|
||||||
|
# #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
|
||||||
|
# #'
|
||||||
|
# #' @examples
|
||||||
|
# #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
|
||||||
|
# #' head(boundaries$field_boundaries_sf)
|
||||||
|
# #'
|
||||||
|
# load_field_boundaries <- function(data_dir) {
|
||||||
|
# # Choose field boundaries file based on project and script type
|
||||||
|
# # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
|
||||||
|
# # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
|
||||||
|
# use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
|
||||||
|
# exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
|
||||||
|
|
||||||
|
# if (use_pivot_2) {
|
||||||
|
# field_boundaries_path <- file.path(data_dir, "pivot_2.geojson")
|
||||||
|
# } else {
|
||||||
|
# field_boundaries_path <- file.path(data_dir, "pivot.geojson")
|
||||||
|
# }
|
||||||
|
|
||||||
|
# if (!file.exists(field_boundaries_path)) {
|
||||||
|
# stop(paste("Field boundaries file not found at path:", field_boundaries_path))
|
||||||
|
# }
|
||||||
|
|
||||||
|
# tryCatch({
|
||||||
|
# # Read GeoJSON with explicit CRS handling
|
||||||
|
# field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
|
||||||
|
|
||||||
|
# # Remove OBJECTID column immediately if it exists
|
||||||
|
# if ("OBJECTID" %in% names(field_boundaries_sf)) {
|
||||||
|
# field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
|
||||||
|
# # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
|
||||||
|
# # to prevent S2 geometry validation errors during downstream processing
|
||||||
|
# field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
|
||||||
|
|
||||||
|
# # Validate and fix CRS if needed
|
||||||
|
# tryCatch({
|
||||||
|
# # Simply assign WGS84 if not already set (safe approach)
|
||||||
|
# if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
|
||||||
|
# st_crs(field_boundaries_sf) <- 4326
|
||||||
|
# warning("CRS was missing, assigned WGS84 (EPSG:4326)")
|
||||||
|
# }
|
||||||
|
# }, error = function(e) {
|
||||||
|
# tryCatch({
|
||||||
|
# st_crs(field_boundaries_sf) <<- 4326
|
||||||
|
# }, error = function(e2) {
|
||||||
|
# warning(paste("Could not set CRS:", e2$message))
|
||||||
|
# })
|
||||||
|
# })
|
||||||
|
|
||||||
|
# # Handle column names - accommodate optional sub_area column
|
||||||
|
# if ("sub_area" %in% names(field_boundaries_sf)) {
|
||||||
|
# field_boundaries_sf <- field_boundaries_sf %>%
|
||||||
|
# dplyr::select(field, sub_field, sub_area) %>%
|
||||||
|
# sf::st_set_geometry("geometry")
|
||||||
|
# } else {
|
||||||
|
# field_boundaries_sf <- field_boundaries_sf %>%
|
||||||
|
# dplyr::select(field, sub_field) %>%
|
||||||
|
# sf::st_set_geometry("geometry")
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # Convert to terra vector if possible, otherwise use sf
|
||||||
|
# field_boundaries <- tryCatch({
|
||||||
|
# field_boundaries_terra <- terra::vect(field_boundaries_sf)
|
||||||
|
# crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
|
||||||
|
# crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
|
||||||
|
|
||||||
|
# if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
|
||||||
|
# terra::crs(field_boundaries_terra) <- "EPSG:4326"
|
||||||
|
# warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
|
||||||
|
# }
|
||||||
|
# field_boundaries_terra
|
||||||
|
|
||||||
|
# }, error = function(e) {
|
||||||
|
# warning(paste("Terra conversion failed, using sf object instead:", e$message))
|
||||||
|
# field_boundaries_sf
|
||||||
|
# })
|
||||||
|
|
||||||
|
# return(list(
|
||||||
|
# field_boundaries_sf = field_boundaries_sf,
|
||||||
|
# field_boundaries = field_boundaries
|
||||||
|
# ))
|
||||||
|
# }, error = function(e) {
|
||||||
|
# cat("[DEBUG] Error in load_field_boundaries:\n")
|
||||||
|
# cat(" Message:", e$message, "\n")
|
||||||
|
# cat(" Call:", deparse(e$call), "\n")
|
||||||
|
# stop(paste("Error loading field boundaries:", e$message))
|
||||||
|
# })
|
||||||
|
# }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# #' Generate a Sequence of Dates for Processing
|
||||||
|
# #'
|
||||||
|
# #' Creates a date range from start_date to end_date and extracts week/year info.
|
||||||
|
# #' Used by Scripts 20, 30, 40 to determine data processing windows.
|
||||||
|
# #'
|
||||||
|
# #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
|
||||||
|
# #' @param offset Number of days to look back from end_date (e.g., 7 for one week)
|
||||||
|
# #' @return A list containing:
|
||||||
|
# #' - week: ISO week number of start_date
|
||||||
|
# #' - year: ISO year of start_date
|
||||||
|
# #' - days_filter: Vector of dates in "YYYY-MM-DD" format
|
||||||
|
# #' - start_date: Start date as Date object
|
||||||
|
# #' - end_date: End date as Date object
|
||||||
|
# #'
|
||||||
|
# #' @details
|
||||||
|
# #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
|
||||||
|
# #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
|
||||||
|
# #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
|
||||||
|
# #'
|
||||||
|
# #' @examples
|
||||||
|
# #' dates <- date_list(as.Date("2025-01-15"), offset = 7)
|
||||||
|
# #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
|
||||||
|
# #'
|
||||||
|
# #' dates <- date_list("2025-12-31", offset = 14)
|
||||||
|
# #' # Handles string input and returns 14 days of data
|
||||||
|
# #'
|
||||||
|
# date_list <- function(end_date, offset) {
|
||||||
|
# # Input validation
|
||||||
|
# if (!lubridate::is.Date(end_date)) {
|
||||||
|
# end_date <- as.Date(end_date)
|
||||||
|
# if (is.na(end_date)) {
|
||||||
|
# stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
|
# offset <- as.numeric(offset)
|
||||||
|
# if (is.na(offset) || offset < 1) {
|
||||||
|
# stop("Invalid offset provided. Expected a positive number.")
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # Calculate date range
|
||||||
|
# offset <- offset - 1 # Adjust offset to include end_date
|
||||||
|
# start_date <- end_date - lubridate::days(offset)
|
||||||
|
|
||||||
|
# # Extract ISO week and year information (from END date for reporting period)
|
||||||
|
# week <- lubridate::isoweek(end_date)
|
||||||
|
# year <- lubridate::isoyear(end_date)
|
||||||
|
|
||||||
|
# # Generate sequence of dates
|
||||||
|
# days_filter <- seq(from = start_date, to = end_date, by = "day")
|
||||||
|
# days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
||||||
|
|
||||||
|
# # Log the date range
|
||||||
|
# safe_log(paste("Date range generated from", start_date, "to", end_date))
|
||||||
|
|
||||||
|
# return(list(
|
||||||
|
# "week" = week,
|
||||||
|
# "year" = year,
|
||||||
|
# "days_filter" = days_filter,
|
||||||
|
# "start_date" = start_date,
|
||||||
|
# "end_date" = end_date
|
||||||
|
# ))
|
||||||
|
# }
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# #' Repair Invalid GeoJSON Geometries
|
||||||
|
# #'
|
||||||
|
# #' Fixes common geometry issues in GeoJSON/sf objects:
|
||||||
|
# #' - Degenerate vertices (duplicate points)
|
||||||
|
# #' - Self-intersecting polygons
|
||||||
|
# #' - Invalid ring orientation
|
||||||
|
# #' - Empty or NULL geometries
|
||||||
|
# #'
|
||||||
|
# #' Uses sf::st_make_valid() with buffer trick as fallback.
|
||||||
|
# #'
|
||||||
|
# #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
|
||||||
|
# #' @return sf object with repaired geometries
|
||||||
|
# #'
|
||||||
|
# #' @details
|
||||||
|
# #' **Why this matters:**
|
||||||
|
# #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
|
||||||
|
# #' rings from manual editing or GIS data sources. These cause errors when using
|
||||||
|
# #' S2 geometry (strict validation) during cropping operations.
|
||||||
|
# #'
|
||||||
|
# #' **Repair strategy (priority order):**
|
||||||
|
# #' 1. Try st_make_valid() - GEOS-based repair (most reliable)
|
||||||
|
# #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
|
||||||
|
# #' 3. Last resort: Silently keep original if repair fails
|
||||||
|
# #'
|
||||||
|
# #' @examples
|
||||||
|
# #' \dontrun{
|
||||||
|
# #' fields <- st_read("pivot.geojson")
|
||||||
|
# #' fields_fixed <- repair_geojson_geometries(fields)
|
||||||
|
# #' cat(paste("Fixed geometries: before=",
|
||||||
|
# #' nrow(fields[!st_is_valid(fields), ]),
|
||||||
|
# #' ", after=",
|
||||||
|
# #' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
|
||||||
|
# #' }
|
||||||
|
# #'
|
||||||
|
# repair_geojson_geometries <- function(sf_object) {
|
||||||
|
# if (!inherits(sf_object, "sf")) {
|
||||||
|
# stop("Input must be an sf (Simple Features) object")
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # Count invalid geometries BEFORE repair
|
||||||
|
# invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
|
||||||
|
|
||||||
|
# if (invalid_before == 0) {
|
||||||
|
# safe_log("All geometries already valid - no repair needed", "INFO")
|
||||||
|
# return(sf_object)
|
||||||
|
# }
|
||||||
|
|
||||||
|
# safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
|
||||||
|
|
||||||
|
# # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
|
||||||
|
# # This handles degenerate vertices, self-intersections, invalid rings while preserving all features
|
||||||
|
# repaired <- tryCatch({
|
||||||
|
# # st_make_valid() on entire sf object preserves all features and attributes
|
||||||
|
# repaired_geom <- sf::st_make_valid(sf_object)
|
||||||
|
|
||||||
|
# # Verify we still have the same number of rows
|
||||||
|
# if (nrow(repaired_geom) != nrow(sf_object)) {
|
||||||
|
# warning("st_make_valid() changed number of features - attempting row-wise repair")
|
||||||
|
|
||||||
|
# # Fallback: Repair row-by-row to maintain original structure
|
||||||
|
# repaired_geom <- sf_object
|
||||||
|
# for (i in seq_len(nrow(sf_object))) {
|
||||||
|
# tryCatch({
|
||||||
|
# if (!sf::st_is_valid(sf_object[i, ])) {
|
||||||
|
# repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
|
||||||
|
# }
|
||||||
|
# }, error = function(e) {
|
||||||
|
# safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
|
||||||
|
# })
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
|
# safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
|
||||||
|
# repaired_geom
|
||||||
|
# }, error = function(e) {
|
||||||
|
# safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
|
||||||
|
# NULL
|
||||||
|
# })
|
||||||
|
|
||||||
|
# # If repair failed, keep original
|
||||||
|
# if (is.null(repaired)) {
|
||||||
|
# safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
|
||||||
|
# "WARNING")
|
||||||
|
# return(sf_object)
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # Count invalid geometries AFTER repair
|
||||||
|
# invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
|
||||||
|
# safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
|
||||||
|
|
||||||
|
# return(repaired)
|
||||||
|
# }
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# # END 00_COMMON_UTILS.R
|
||||||
|
# ==============================================================================
|
||||||
|
|
@ -1,499 +0,0 @@
|
||||||
#' Combined: Create master grid and split TIFFs into tiles
|
|
||||||
#' ====================================================================
|
|
||||||
#'
|
|
||||||
#' Purpose:
|
|
||||||
#' 1. Check all daily TIFFs for matching extents
|
|
||||||
#' 2. Create master 5×5 grid covering all TIFFs
|
|
||||||
#' 3. Split each daily TIFF into 25 tiles using the master grid
|
|
||||||
#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
|
|
||||||
#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18
|
|
||||||
|
|
||||||
|
|
||||||
library(terra)
|
|
||||||
library(sf)
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CONFIGURATION & COMMAND-LINE ARGUMENTS
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Parse command-line arguments for date filtering
|
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
|
||||||
|
|
||||||
# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17
|
|
||||||
start_date <- NULL
|
|
||||||
end_date <- NULL
|
|
||||||
|
|
||||||
if (length(args) >= 1) {
|
|
||||||
start_date <- as.Date(args[1])
|
|
||||||
cat("Filtering: start date =", as.character(start_date), "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(args) >= 2) {
|
|
||||||
end_date <- as.Date(args[2])
|
|
||||||
cat("Filtering: end date =", as.character(end_date), "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
PROJECT <- "angata"
|
|
||||||
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
|
|
||||||
|
|
||||||
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
|
|
||||||
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
|
|
||||||
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
|
|
||||||
GRID_NROWS <- 5
|
|
||||||
GRID_NCOLS <- 5
|
|
||||||
|
|
||||||
# Construct grid-specific subfolder path
|
|
||||||
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
|
|
||||||
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
|
|
||||||
|
|
||||||
# Load field boundaries for overlap checking
|
|
||||||
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
|
|
||||||
|
|
||||||
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
|
|
||||||
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n[PART 1] Creating Master Grid\n")
|
|
||||||
|
|
||||||
# Load field boundaries for overlap checking
|
|
||||||
cat("\n[1] Checking for existing master grid...\n")
|
|
||||||
|
|
||||||
# Check if master grid already exists
|
|
||||||
MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
|
||||||
|
|
||||||
if (file.exists(MASTER_GRID_PATH)) {
|
|
||||||
cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "")
|
|
||||||
master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE)
|
|
||||||
field_boundaries_sf <- NULL # No need to load pivot.geojson
|
|
||||||
field_boundaries_vect <- NULL
|
|
||||||
|
|
||||||
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
|
|
||||||
|
|
||||||
} else {
|
|
||||||
# No existing grid - need to create one from pivot.geojson
|
|
||||||
cat(" No existing grid found. Creating new one from pivot.geojson...\n")
|
|
||||||
|
|
||||||
if (!file.exists(GEOJSON_PATH)) {
|
|
||||||
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
|
|
||||||
"Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ",
|
|
||||||
"from the same directory as a previous successful run (grid already exists).")
|
|
||||||
}
|
|
||||||
|
|
||||||
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
|
|
||||||
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
|
|
||||||
|
|
||||||
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Try to find a name column (only if field_boundaries_sf exists)
|
|
||||||
if (!is.null(field_boundaries_sf)) {
|
|
||||||
field_names <- NA
|
|
||||||
if ("name" %in% names(field_boundaries_sf)) {
|
|
||||||
field_names <- field_boundaries_sf$name
|
|
||||||
} else if ("field" %in% names(field_boundaries_sf)) {
|
|
||||||
field_names <- field_boundaries_sf$field
|
|
||||||
} else if ("field_name" %in% names(field_boundaries_sf)) {
|
|
||||||
field_names <- field_boundaries_sf$field_name
|
|
||||||
} else {
|
|
||||||
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Helper function: Check if a tile overlaps with any field (simple bbox overlap)
|
|
||||||
tile_overlaps_fields <- function(tile_extent, field_geoms) {
|
|
||||||
tryCatch({
|
|
||||||
# Simple bounding box overlap test - no complex geometry operations
|
|
||||||
# Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR
|
|
||||||
# box1.ymax < box2.ymin OR box1.ymin > box2.ymax)
|
|
||||||
|
|
||||||
# For each field geometry, check if it overlaps with tile bbox
|
|
||||||
for (i in seq_len(length(field_geoms))) {
|
|
||||||
# Skip empty geometries
|
|
||||||
if (st_is_empty(field_geoms[i])) {
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get field bbox
|
|
||||||
field_bbox <- st_bbox(field_geoms[i])
|
|
||||||
|
|
||||||
# Check bbox overlap (simple coordinate comparison)
|
|
||||||
x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax)
|
|
||||||
y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax)
|
|
||||||
|
|
||||||
if (x_overlap && y_overlap) {
|
|
||||||
return(TRUE) # Found overlap!
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return(FALSE) # No overlap found
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
|
|
||||||
return(TRUE) # Default to including tile if there's an error
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
cat("\n[2] Checking TIFF extents...\n")
|
|
||||||
|
|
||||||
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
|
|
||||||
tiff_files <- sort(tiff_files)
|
|
||||||
|
|
||||||
# Filter by date range if specified
|
|
||||||
if (!is.null(start_date) || !is.null(end_date)) {
|
|
||||||
cat("\nApplying date filter...\n")
|
|
||||||
|
|
||||||
file_dates <- as.Date(sub("\\.tif$", "", tiff_files))
|
|
||||||
|
|
||||||
if (!is.null(start_date) && !is.null(end_date)) {
|
|
||||||
keep_idx <- file_dates >= start_date & file_dates <= end_date
|
|
||||||
cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "")
|
|
||||||
} else if (!is.null(start_date)) {
|
|
||||||
keep_idx <- file_dates >= start_date
|
|
||||||
cat(" From: ", as.character(start_date), "\n", sep = "")
|
|
||||||
} else {
|
|
||||||
keep_idx <- file_dates <= end_date
|
|
||||||
cat(" Until: ", as.character(end_date), "\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
tiff_files <- tiff_files[keep_idx]
|
|
||||||
cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(tiff_files) == 0) {
|
|
||||||
stop("No TIFF files found in ", TIFF_FOLDER)
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
|
|
||||||
cat(" Checking extents... (this may take a while)\n")
|
|
||||||
|
|
||||||
# Load all extents - ONE TIME, upfront
|
|
||||||
extents <- list()
|
|
||||||
for (i in seq_along(tiff_files)) {
|
|
||||||
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
|
|
||||||
raster <- terra::rast(tiff_path)
|
|
||||||
ext <- terra::ext(raster)
|
|
||||||
extents[[i]] <- ext
|
|
||||||
|
|
||||||
# Progress indicator every 50 files
|
|
||||||
if (i %% 50 == 0) {
|
|
||||||
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" ✓ All extents loaded\n")
|
|
||||||
|
|
||||||
# Check if all extents match
|
|
||||||
cat("\n[3] Comparing extents...\n")
|
|
||||||
|
|
||||||
tolerance <- 1e-8
|
|
||||||
all_match <- TRUE
|
|
||||||
first_ext <- extents[[1]]
|
|
||||||
|
|
||||||
for (i in 2:length(extents)) {
|
|
||||||
curr_ext <- extents[[i]]
|
|
||||||
match <- (
|
|
||||||
abs(curr_ext$xmin - first_ext$xmin) < tolerance &&
|
|
||||||
abs(curr_ext$xmax - first_ext$xmax) < tolerance &&
|
|
||||||
abs(curr_ext$ymin - first_ext$ymin) < tolerance &&
|
|
||||||
abs(curr_ext$ymax - first_ext$ymax) < tolerance
|
|
||||||
)
|
|
||||||
if (!match) {
|
|
||||||
all_match <- FALSE
|
|
||||||
cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "")
|
|
||||||
cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ",
|
|
||||||
"Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "")
|
|
||||||
cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ",
|
|
||||||
"Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (all_match) {
|
|
||||||
cat(" ✓ All TIFF extents MATCH perfectly!\n")
|
|
||||||
} else {
|
|
||||||
cat(" ⚠️ Extents differ - creating master extent covering all\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create master extent
|
|
||||||
cat("\n[4] Creating master extent...\n")
|
|
||||||
|
|
||||||
master_xmin <- min(sapply(extents, function(e) e$xmin))
|
|
||||||
master_xmax <- max(sapply(extents, function(e) e$xmax))
|
|
||||||
master_ymin <- min(sapply(extents, function(e) e$ymin))
|
|
||||||
master_ymax <- max(sapply(extents, function(e) e$ymax))
|
|
||||||
|
|
||||||
x_range_m <- (master_xmax - master_xmin) * 111320
|
|
||||||
y_range_m <- (master_ymax - master_ymin) * 111320
|
|
||||||
|
|
||||||
cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ",
|
|
||||||
"Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "")
|
|
||||||
cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "")
|
|
||||||
|
|
||||||
# Auto-determine grid size based on ROI dimensions
|
|
||||||
if (x_range_m < 10000 && y_range_m < 10000) {
|
|
||||||
cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n")
|
|
||||||
GRID_NROWS <- 1
|
|
||||||
GRID_NCOLS <- 1
|
|
||||||
} else {
|
|
||||||
cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n")
|
|
||||||
GRID_NROWS <- 5
|
|
||||||
GRID_NCOLS <- 5
|
|
||||||
}
|
|
||||||
|
|
||||||
N_TILES <- GRID_NROWS * GRID_NCOLS
|
|
||||||
|
|
||||||
# Check if master grid already exists
|
|
||||||
cat("\n[5] Checking if master grid exists...\n")
|
|
||||||
|
|
||||||
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
|
||||||
|
|
||||||
if (file.exists(master_grid_file)) {
|
|
||||||
cat(" ✓ Master grid exists! Loading existing grid...\n")
|
|
||||||
master_grid_sf <- st_read(master_grid_file, quiet = TRUE)
|
|
||||||
master_grid_vect <- terra::vect(master_grid_file)
|
|
||||||
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
|
|
||||||
} else {
|
|
||||||
cat(" Grid does not exist. Creating new master grid...\n")
|
|
||||||
|
|
||||||
# Create 5×5 grid
|
|
||||||
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
|
|
||||||
|
|
||||||
master_bbox <- st_bbox(c(
|
|
||||||
xmin = master_xmin,
|
|
||||||
xmax = master_xmax,
|
|
||||||
ymin = master_ymin,
|
|
||||||
ymax = master_ymax
|
|
||||||
), crs = 4326)
|
|
||||||
|
|
||||||
bbox_sf <- st_as_sfc(master_bbox)
|
|
||||||
|
|
||||||
master_grid <- st_make_grid(
|
|
||||||
bbox_sf,
|
|
||||||
n = c(GRID_NCOLS, GRID_NROWS),
|
|
||||||
what = "polygons"
|
|
||||||
)
|
|
||||||
|
|
||||||
master_grid_sf <- st_sf(
|
|
||||||
tile_id = sprintf("%02d", 1:length(master_grid)),
|
|
||||||
geometry = master_grid
|
|
||||||
)
|
|
||||||
|
|
||||||
cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "")
|
|
||||||
|
|
||||||
# Convert to SpatVector for use in makeTiles
|
|
||||||
master_grid_vect <- terra::vect(master_grid_sf)
|
|
||||||
|
|
||||||
# Save master grid
|
|
||||||
if (!dir.exists(OUTPUT_FOLDER)) {
|
|
||||||
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
}
|
|
||||||
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
|
|
||||||
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
|
|
||||||
|
|
||||||
# If grid was loaded from file, it's already filtered. Skip filtering.
|
|
||||||
if (!file.exists(MASTER_GRID_PATH)) {
|
|
||||||
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
|
|
||||||
|
|
||||||
# Check which tiles overlap with any field
|
|
||||||
overlapping_tile_indices <- c()
|
|
||||||
for (tile_idx in 1:nrow(master_grid_sf)) {
|
|
||||||
tile_geom <- master_grid_sf[tile_idx, ]
|
|
||||||
|
|
||||||
# Check overlap with any field
|
|
||||||
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
|
|
||||||
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
|
|
||||||
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
|
|
||||||
|
|
||||||
# Create filtered grid with only overlapping tiles
|
|
||||||
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
|
|
||||||
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
|
|
||||||
} else {
|
|
||||||
cat("\n[7] Using pre-filtered grid (already loaded from file)...\n")
|
|
||||||
# Grid was already loaded - it's already filtered
|
|
||||||
filtered_grid_sf <- master_grid_sf
|
|
||||||
}
|
|
||||||
|
|
||||||
# Convert to SpatVector for makeTiles
|
|
||||||
filtered_grid_vect <- terra::vect(filtered_grid_sf)
|
|
||||||
|
|
||||||
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
|
|
||||||
cat("\n[8] Processing each date independently...\n")
|
|
||||||
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
|
|
||||||
|
|
||||||
total_tiles_created <- 0
|
|
||||||
dates_skipped <- 0
|
|
||||||
dates_processed <- 0
|
|
||||||
|
|
||||||
for (file_idx in seq_along(tiff_files)) {
|
|
||||||
tiff_file <- tiff_files[file_idx]
|
|
||||||
date_str <- gsub("\\.tif$", "", tiff_file)
|
|
||||||
|
|
||||||
# Create date-specific output folder
|
|
||||||
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
|
|
||||||
|
|
||||||
# CHECK: Skip if date already processed (RESUME-SAFE)
|
|
||||||
if (dir.exists(date_output_folder)) {
|
|
||||||
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
|
|
||||||
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
|
|
||||||
|
|
||||||
if (length(existing_tiles) > 0) {
|
|
||||||
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
|
|
||||||
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
|
|
||||||
dates_skipped <- dates_skipped + 1
|
|
||||||
next # Skip this date
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
|
|
||||||
dates_processed <- dates_processed + 1
|
|
||||||
|
|
||||||
# Load TIFF for this date only
|
|
||||||
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
|
|
||||||
raster <- terra::rast(tiff_path)
|
|
||||||
|
|
||||||
dims <- dim(raster)
|
|
||||||
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
|
|
||||||
|
|
||||||
# Create date-specific output folder
|
|
||||||
if (!dir.exists(date_output_folder)) {
|
|
||||||
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "")
|
|
||||||
|
|
||||||
# Use makeTiles with FILTERED grid (only overlapping tiles)
|
|
||||||
tiles_list <- terra::makeTiles(
|
|
||||||
x = raster,
|
|
||||||
y = filtered_grid_vect,
|
|
||||||
filename = file.path(date_output_folder, "tile.tif"),
|
|
||||||
overwrite = TRUE
|
|
||||||
)
|
|
||||||
|
|
||||||
# Rename tiles to [DATE]_[TILE_ID].tif
|
|
||||||
for (tile_idx in seq_along(tiles_list)) {
|
|
||||||
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
|
|
||||||
tile_id <- filtered_grid_sf$tile_id[tile_idx]
|
|
||||||
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
|
|
||||||
|
|
||||||
if (file.exists(source_file)) {
|
|
||||||
file.rename(source_file, final_file)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
|
|
||||||
total_tiles_created <- total_tiles_created + length(tiles_list)
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# VERIFICATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n[9] Verifying output...\n")
|
|
||||||
|
|
||||||
# Count tiles per date folder
|
|
||||||
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
|
|
||||||
date_folders <- sort(date_folders[date_folders != "."])
|
|
||||||
|
|
||||||
total_tile_files <- 0
|
|
||||||
for (date_folder in date_folders) {
|
|
||||||
tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder),
|
|
||||||
pattern = "\\.tif$")
|
|
||||||
tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)]
|
|
||||||
total_tile_files <- total_tile_files + length(tiles_in_folder)
|
|
||||||
cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# SUMMARY
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n\n========== SUMMARY ==========\n")
|
|
||||||
|
|
||||||
cat("\nGrid Configuration:\n")
|
|
||||||
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
|
|
||||||
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
|
||||||
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
|
||||||
|
|
||||||
cat("\nField Filtering:\n")
|
|
||||||
cat(" - Field boundaries loaded from pivot.geojson\n")
|
|
||||||
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
|
|
||||||
cat(" - Significant storage savings for sparse fields!\n")
|
|
||||||
|
|
||||||
cat("\nProcessing Summary:\n")
|
|
||||||
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
|
|
||||||
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
|
|
||||||
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
|
|
||||||
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
|
|
||||||
if (dates_processed > 0) {
|
|
||||||
avg_tiles_per_date <- total_tiles_created / dates_processed
|
|
||||||
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
cat("\nDirectory Structure:\n")
|
|
||||||
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
|
|
||||||
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
|
|
||||||
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
|
||||||
cat(" ├── 2024-01-15/\n")
|
|
||||||
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
|
|
||||||
cat(" │ ├── 2024-01-15_05.tif\n")
|
|
||||||
cat(" │ └── ...\n")
|
|
||||||
cat(" ├── 2024-01-16/\n")
|
|
||||||
cat(" │ └── ...\n")
|
|
||||||
cat(" └── ...\n")
|
|
||||||
|
|
||||||
cat("\n⭐ Key Benefits:\n")
|
|
||||||
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
|
|
||||||
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
|
|
||||||
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
|
|
||||||
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# WRITE TILING CONFIGURATION METADATA
|
|
||||||
# ============================================================================
|
|
||||||
# This metadata file is read by parameters_project.R to determine mosaic mode
|
|
||||||
# It allows script 40 to know what script 10 decided without re-computing
|
|
||||||
|
|
||||||
cat("\n[10] Writing tiling configuration metadata...\n")
|
|
||||||
|
|
||||||
config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json")
|
|
||||||
config_json <- paste0(
|
|
||||||
'{\n',
|
|
||||||
' "project": "', PROJECT, '",\n',
|
|
||||||
' "has_tiles": ', tolower(N_TILES > 1), ',\n',
|
|
||||||
' "grid_size": "', GRID_SIZE_LABEL, '",\n',
|
|
||||||
' "grid_rows": ', GRID_NROWS, ',\n',
|
|
||||||
' "grid_cols": ', GRID_NCOLS, ',\n',
|
|
||||||
' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n',
|
|
||||||
' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n',
|
|
||||||
' "created_date": "', Sys.Date(), '",\n',
|
|
||||||
' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n',
|
|
||||||
'}\n'
|
|
||||||
)
|
|
||||||
|
|
||||||
writeLines(config_json, config_file)
|
|
||||||
cat(" ✓ Metadata saved to: tiling_config.json\n")
|
|
||||||
cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "")
|
|
||||||
cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "")
|
|
||||||
|
|
||||||
cat("\n✓ Script complete!\n")
|
|
||||||
165
r_app/10_create_per_field_tiffs.R
Normal file
165
r_app/10_create_per_field_tiffs.R
Normal file
|
|
@ -0,0 +1,165 @@
|
||||||
|
# ============================================================================
|
||||||
|
# SCRIPT 10: Create Per-Field TIFFs (Data Organization & Splitting)
|
||||||
|
# ============================================================================
|
||||||
|
# PURPOSE:
|
||||||
|
# Split full-farm satellite TIFFs into per-field file structure. Supports
|
||||||
|
# two phases: legacy data migration and ongoing new downloads. Transforms
|
||||||
|
# single large-file architecture into per-field directory structure for
|
||||||
|
# clean aggregation in downstream scripts (Script 20, 40, 80/90).
|
||||||
|
#
|
||||||
|
# INPUT DATA:
|
||||||
|
# - Source: laravel_app/storage/app/{project}/merged_tif/ or merged_final_tif/
|
||||||
|
# - Format: GeoTIFF (4-band RGB+NIR or 5-band with CI)
|
||||||
|
# - Naming: {YYYY-MM-DD}.tif (full farm mosaic)
|
||||||
|
#
|
||||||
|
# OUTPUT DATA:
|
||||||
|
# - Destination: laravel_app/storage/app/{project}/field_tiles/
|
||||||
|
# - Format: GeoTIFF (4-band RGB+NIR, same as input)
|
||||||
|
# - Structure: field_tiles/{FIELD}/{YYYY-MM-DD}.tif
|
||||||
|
# - Naming: Per-field GeoTIFFs organized by field and date
|
||||||
|
#
|
||||||
|
# USAGE:
|
||||||
|
# Rscript 10_create_per_field_tiffs.R [project] [end_date] [offset]
|
||||||
|
#
|
||||||
|
# Example (Windows PowerShell):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# - project: Project name (character) - angata, chemba, xinavane, esa, simba (default: angata)
|
||||||
|
# - end_date: End date for processing (YYYY-MM-DD format, default: today)
|
||||||
|
# - offset: Days to look back (numeric, default: 7)
|
||||||
|
#
|
||||||
|
# CLIENT TYPES:
|
||||||
|
# - cane_supply (ANGATA): Yes - primary data organization script
|
||||||
|
# - agronomic_support (AURA): Yes - supports field-level analysis
|
||||||
|
#
|
||||||
|
# DEPENDENCIES:
|
||||||
|
# - Packages: terra, sf, tidyverse
|
||||||
|
# - Utils files: parameters_project.R, 00_common_utils.R, 10_create_per_field_tiffs_utils.R
|
||||||
|
# - External data: Field boundaries (pivot.geojson)
|
||||||
|
# - Data directories: merged_tif/, field_tiles/ (created if missing)
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# - Supports two-phase migration: legacy (merged_final_tif) and ongoing (merged_tif)
|
||||||
|
# - Automatically detects and handles field boundaries from pivot.geojson
|
||||||
|
# - Geometry validation and repair applied via st_make_valid()
|
||||||
|
# - Critical for downstream Scripts 20, 40, and KPI calculations
|
||||||
|
# - Creates per-field structure that enables efficient per-field processing
|
||||||
|
#
|
||||||
|
# RELATED ISSUES:
|
||||||
|
# SC-111: Script 10 refactoring and geometry repair
|
||||||
|
# SC-112: Utilities restructuring (uses 00_common_utils.R)
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# Spatial data handling
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
|
||||||
|
library(terra) # For raster operations (reading/writing GeoTIFFs, cropping to field boundaries)
|
||||||
|
library(sf) # For spatial operations (reading field boundaries GeoJSON, masking)
|
||||||
|
library(here) # For relative path resolution
|
||||||
|
})
|
||||||
|
# ==============================================================================
|
||||||
|
# MAIN PROCESSING FUNCTION
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
main <- function() {
|
||||||
|
# STEP 1: Set working directory to project root (smartcane/)
|
||||||
|
# This ensures all relative paths resolve correctly
|
||||||
|
if (basename(getwd()) == "r_app") {
|
||||||
|
setwd("..")
|
||||||
|
}
|
||||||
|
|
||||||
|
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
|
||||||
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
|
||||||
|
# Parse arguments: [project] [end_date] [offset]
|
||||||
|
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||||
|
end_date_arg <- if (length(args) >= 2 && args[2] != "") as.Date(args[2], format = "%Y-%m-%d") else Sys.Date()
|
||||||
|
offset_arg <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
|
||||||
|
|
||||||
|
# Make variables available to sourced files (they execute in global scope)
|
||||||
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
|
assign("end_date", end_date_arg, envir = .GlobalEnv)
|
||||||
|
assign("offset", offset_arg, envir = .GlobalEnv)
|
||||||
|
|
||||||
|
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
|
||||||
|
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
|
||||||
|
tryCatch({
|
||||||
|
source("r_app/parameters_project.R")
|
||||||
|
}, error = function(e) {
|
||||||
|
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
|
||||||
|
stop(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Load Script 10-specific utilities
|
||||||
|
tryCatch({
|
||||||
|
source("r_app/10_create_per_field_tiffs_utils.R")
|
||||||
|
}, error = function(e) {
|
||||||
|
cat(sprintf("Error loading 10_create_per_field_tiffs_utils.R: %s\n", e$message))
|
||||||
|
stop(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
# STEP 4: Set default date parameters (can be overridden by pipeline runner via assign())
|
||||||
|
# These control which dates Script 10 processes from merged_tif/
|
||||||
|
# Window: end_date - offset days to end_date
|
||||||
|
# Always coerce to correct types to avoid issues with lingering/inherited values
|
||||||
|
if (!exists("end_date") || !inherits(end_date, "Date")) {
|
||||||
|
end_date <- Sys.Date()
|
||||||
|
safe_log(paste("Using default end_date:", end_date), "INFO")
|
||||||
|
}
|
||||||
|
if (!exists("offset") || !is.numeric(offset)) {
|
||||||
|
offset <- 7
|
||||||
|
safe_log(paste("Using default offset:", offset, "days"), "INFO")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure offset is numeric (in case it came in as a character string from environment)
|
||||||
|
if (is.character(offset)) {
|
||||||
|
offset <- as.numeric(offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate date window for processing
|
||||||
|
start_date <- end_date - offset
|
||||||
|
date_window <- seq(start_date, end_date, by = "day")
|
||||||
|
date_window_str <- format(date_window, "%Y-%m-%d")
|
||||||
|
safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO")
|
||||||
|
|
||||||
|
# STEP 5: Load centralized path structure (creates all directories automatically)
|
||||||
|
paths <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
|
safe_log(paste("Project:", project_dir))
|
||||||
|
safe_log(paste("Base path:", paths$laravel_storage_dir))
|
||||||
|
safe_log(paste("Data dir:", paths$data_dir))
|
||||||
|
|
||||||
|
# Load field boundaries using data_dir (not field_boundaries_path)
|
||||||
|
# load_field_boundaries() expects a directory and builds the file path internally
|
||||||
|
fields_data <- load_field_boundaries(paths$data_dir)
|
||||||
|
fields <- fields_data$field_boundaries_sf
|
||||||
|
|
||||||
|
# Define input and output directories (from centralized paths)
|
||||||
|
merged_tif_dir <- paths$merged_tif_folder
|
||||||
|
field_tiles_dir <- paths$field_tiles_dir
|
||||||
|
field_tiles_ci_dir <- paths$field_tiles_ci_dir
|
||||||
|
|
||||||
|
# PHASE 1: Process new downloads (always runs)
|
||||||
|
# Pass field_tiles_ci_dir so it can skip dates already migrated
|
||||||
|
# Also pass end_date and offset so only dates in window are processed
|
||||||
|
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir,
|
||||||
|
end_date = end_date, offset = offset)
|
||||||
|
|
||||||
|
safe_log("\n========================================", "INFO")
|
||||||
|
safe_log("FINAL SUMMARY", "INFO")
|
||||||
|
safe_log("========================================", "INFO")
|
||||||
|
safe_log(paste("Processing: created =", process_result$total_created,
|
||||||
|
", skipped =", process_result$total_skipped,
|
||||||
|
", errors =", process_result$total_errors), "INFO")
|
||||||
|
safe_log("Script 10 complete", "INFO")
|
||||||
|
safe_log("========================================\n", "INFO")
|
||||||
|
|
||||||
|
quit(status = 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Execute main if called from command line
|
||||||
|
if (sys.nframe() == 0) {
|
||||||
|
main()
|
||||||
|
}
|
||||||
306
r_app/10_create_per_field_tiffs_utils.R
Normal file
306
r_app/10_create_per_field_tiffs_utils.R
Normal file
|
|
@ -0,0 +1,306 @@
|
||||||
|
# ==============================================================================
|
||||||
|
# 10_CREATE_PER_FIELD_TIFFS_UTILS.R
|
||||||
|
# ==============================================================================
|
||||||
|
# UTILITY FUNCTIONS FOR SCRIPT 10: Per-Field TIFF Creation
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Extracted helper functions specific to Script 10 (per-field TIFF splitting).
|
||||||
|
# These functions are domain-specific (not generic) and handle raster cropping
|
||||||
|
# and batch processing of satellite imagery for per-field storage.
|
||||||
|
#
|
||||||
|
# FUNCTIONS:
|
||||||
|
# 1. crop_tiff_to_fields() — Crop a single TIFF to field boundaries
|
||||||
|
# 2. process_new_merged_tif() — Batch process all new merged TIFFs into per-field structure
|
||||||
|
#
|
||||||
|
# DEPENDENCIES:
|
||||||
|
# - terra (raster operations)
|
||||||
|
# - sf (geometry operations)
|
||||||
|
# - 00_common_utils.R (logging via safe_log())
|
||||||
|
#
|
||||||
|
# USAGE:
|
||||||
|
# source(here::here("r_app", "10_create_per_field_tiffs_utils.R"))
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
library(terra)
|
||||||
|
library(sf)
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
#' Crop Single TIFF to Field Boundaries
|
||||||
|
#'
|
||||||
|
#' Loads a single satellite TIFF, finds overlapping field boundaries, and writes
|
||||||
|
#' cropped per-field versions to output directory. Handles CRS reprojection and
|
||||||
|
#' idempotent file creation (skips existing files).
|
||||||
|
#'
|
||||||
|
#' @param tif_path Character. Absolute path to source TIFF file (4-band or 5-band).
|
||||||
|
#' @param tif_date Character. Date string (YYYY-MM-DD) for naming output files.
|
||||||
|
#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column.
|
||||||
|
#' @param output_base_dir Character. Base directory where per-field TIFFs are stored
|
||||||
|
#' (e.g., "field_tiles" or "field_tiles_CI").
|
||||||
|
#'
|
||||||
|
#' @return List with elements:
|
||||||
|
#' - created: Integer. Number of field TIFFs created
|
||||||
|
#' - skipped: Integer. Number of files that already existed
|
||||||
|
#' - errors: Integer. Number of fields that failed to crop
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' **Output Structure:**
|
||||||
|
#' Creates files at: {output_base_dir}/{field_name}/{tif_date}.tif
|
||||||
|
#'
|
||||||
|
#' **CRS Handling:**
|
||||||
|
#' Automatically reprojects field boundaries to match raster CRS before cropping.
|
||||||
|
#'
|
||||||
|
#' **Idempotency:**
|
||||||
|
#' If output file already exists, skips writing (counted as "skipped").
|
||||||
|
#' Safe to re-run without duplicate file creation.
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' \dontrun{
|
||||||
|
#' fields <- load_field_boundaries("path/to/pivot.geojson")
|
||||||
|
#' result <- crop_tiff_to_fields(
|
||||||
|
#' tif_path = "merged_tif/2024-01-15.tif",
|
||||||
|
#' tif_date = "2024-01-15",
|
||||||
|
#' fields = fields,
|
||||||
|
#' output_base_dir = "field_tiles"
|
||||||
|
#' )
|
||||||
|
#' cat(sprintf("Created: %d, Skipped: %d, Errors: %d\n",
|
||||||
|
#' result$created, result$skipped, result$errors))
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {
|
||||||
|
|
||||||
|
created <- 0
|
||||||
|
skipped <- 0
|
||||||
|
errors <- 0
|
||||||
|
|
||||||
|
# Load raster
|
||||||
|
if (!file.exists(tif_path)) {
|
||||||
|
safe_log(paste("ERROR: TIFF not found:", tif_path), "ERROR")
|
||||||
|
return(list(created = 0, skipped = 0, errors = 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
rast <- tryCatch({
|
||||||
|
rast(tif_path)
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("ERROR loading raster:", e$message), "ERROR")
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
|
||||||
|
if (is.null(rast)) {
|
||||||
|
return(list(created = 0, skipped = 0, errors = 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create raster bounding box in raster CRS
|
||||||
|
rast_bbox <- st_as_sfc(st_bbox(rast))
|
||||||
|
st_crs(rast_bbox) <- st_crs(rast)
|
||||||
|
|
||||||
|
# Reproject fields to match raster CRS
|
||||||
|
fields_reprojected <- st_transform(fields, st_crs(rast_bbox))
|
||||||
|
|
||||||
|
# Find which fields intersect this raster (CRITICAL: raster bbox first, then fields)
|
||||||
|
overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE)
|
||||||
|
overlapping_indices <- unique(unlist(overlapping_indices))
|
||||||
|
|
||||||
|
if (length(overlapping_indices) == 0) {
|
||||||
|
safe_log(paste("No fields intersect TIFF:", basename(tif_path)), "INFO")
|
||||||
|
return(list(created = 0, skipped = 0, errors = 0))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Process each overlapping field
|
||||||
|
for (field_idx in overlapping_indices) {
|
||||||
|
field_name <- fields$field[field_idx]
|
||||||
|
field_geom <- fields_reprojected[field_idx, ]
|
||||||
|
|
||||||
|
# Create field directory
|
||||||
|
field_dir <- file.path(output_base_dir, field_name)
|
||||||
|
if (!dir.exists(field_dir)) {
|
||||||
|
dir.create(field_dir, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Output file path
|
||||||
|
output_path <- file.path(field_dir, paste0(tif_date, ".tif"))
|
||||||
|
|
||||||
|
# Check if file already exists (idempotent)
|
||||||
|
if (file.exists(output_path)) {
|
||||||
|
skipped <- skipped + 1
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# Crop raster to field boundary
|
||||||
|
tryCatch({
|
||||||
|
field_rast <- crop(rast, field_geom)
|
||||||
|
writeRaster(field_rast, output_path, overwrite = TRUE)
|
||||||
|
created <- created + 1
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("ERROR cropping field", field_name, ":", e$message), "ERROR")
|
||||||
|
errors <<- errors + 1
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return(list(created = created, skipped = skipped, errors = errors))
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
#' Process New Merged TIFFs into Per-Field Structure
|
||||||
|
#'
|
||||||
|
#' Batch processes all new 4-band raw satellite TIFFs from merged_tif/ directory.
|
||||||
|
#' Crops each TIFF to field boundaries and stores results in per-field subdirectories.
|
||||||
|
#' Supports migration mode: skips dates already processed and migrated to field_tiles_CI/.
|
||||||
|
#'
|
||||||
|
#' @param merged_tif_dir Character. Source directory containing raw merged TIFFs
|
||||||
|
#' (e.g., "merged_tif/").
|
||||||
|
#' @param field_tiles_dir Character. Target directory for per-field TIFFs
|
||||||
|
#' (e.g., "field_tiles/").
|
||||||
|
#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column.
|
||||||
|
#' @param field_tiles_ci_dir Character. Optional. Directory where migrated CI-calculated
|
||||||
|
#' TIFFs are stored. If provided, skips dates
|
||||||
|
#' already processed and moved to field_tiles_CI/.
|
||||||
|
#' Default: NULL (process all dates).
|
||||||
|
#' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD).
|
||||||
|
#' Default: NULL (process all available dates).
|
||||||
|
#' @param offset Integer. Optional. Number of days to look back from end_date.
|
||||||
|
#' Only used if end_date is also provided.
|
||||||
|
#' Default: NULL (process all available dates).
|
||||||
|
#'
|
||||||
|
#' @return List with elements:
|
||||||
|
#' - total_created: Integer. Total field TIFFs created across all dates
|
||||||
|
#' - total_skipped: Integer. Total files that already existed
|
||||||
|
#' - total_errors: Integer. Total cropping errors across all dates
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' **Migration Logic:**
|
||||||
|
#' When migration phase is active (field_tiles_CI/ contains CI-calculated TIFFs),
|
||||||
|
#' this function detects which dates have been migrated and skips reprocessing them.
|
||||||
|
#' This prevents redundant work during the per-field architecture transition.
|
||||||
|
#'
|
||||||
|
#' **Date Detection:**
|
||||||
|
#' Finds all files in merged_tif_dir matching pattern: YYYY-MM-DD.tif
|
||||||
|
#'
|
||||||
|
#' **Output Structure:**
|
||||||
|
#' Creates per-field subdirectories and stores: field_tiles/{FIELD}/{DATE}.tif
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' \dontrun{
|
||||||
|
#' fields <- load_field_boundaries("path/to/pivot.geojson")
|
||||||
|
#' result <- process_new_merged_tif(
|
||||||
|
#' merged_tif_dir = "merged_tif",
|
||||||
|
#' field_tiles_dir = "field_tiles",
|
||||||
|
#' fields = fields,
|
||||||
|
#' field_tiles_ci_dir = "field_tiles_CI"
|
||||||
|
#' )
|
||||||
|
#' cat(sprintf("Processing complete: created=%d, skipped=%d, errors=%d\n",
|
||||||
|
#' result$total_created, result$total_skipped, result$total_errors))
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL,
|
||||||
|
end_date = NULL, offset = NULL) {
|
||||||
|
|
||||||
|
safe_log("\n========================================", "INFO")
|
||||||
|
safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO")
|
||||||
|
safe_log("========================================", "INFO")
|
||||||
|
|
||||||
|
# Check if download directory exists
|
||||||
|
if (!dir.exists(merged_tif_dir)) {
|
||||||
|
safe_log("No merged_tif/ directory found - no new data to process", "WARNING")
|
||||||
|
return(list(total_created = 0, total_skipped = 0, total_errors = 0))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create output directory
|
||||||
|
if (!dir.exists(field_tiles_dir)) {
|
||||||
|
dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Find all date-pattern TIFFs in root of merged_tif
|
||||||
|
tiff_files <- list.files(
|
||||||
|
merged_tif_dir,
|
||||||
|
pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$",
|
||||||
|
full.names = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
# FILTER by date window if end_date and offset provided
|
||||||
|
if (!is.null(end_date) && !is.null(offset)) {
|
||||||
|
start_date <- end_date - offset
|
||||||
|
date_range <- seq(start_date, end_date, by = "day")
|
||||||
|
date_range_str <- format(date_range, "%Y-%m-%d")
|
||||||
|
|
||||||
|
# Extract dates from filenames and filter
|
||||||
|
tiff_dates <- gsub("\\.tif$", "", basename(tiff_files))
|
||||||
|
tiff_files <- tiff_files[tiff_dates %in% date_range_str]
|
||||||
|
|
||||||
|
safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO")
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO")
|
||||||
|
|
||||||
|
if (length(tiff_files) == 0) {
|
||||||
|
safe_log("No new TIFFs found - nothing to process", "WARNING")
|
||||||
|
return(list(total_created = 0, total_skipped = 0, total_errors = 0))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Process each new TIFF
|
||||||
|
total_created <- 0
|
||||||
|
total_skipped <- 0
|
||||||
|
total_errors <- 0
|
||||||
|
|
||||||
|
for (tif_path in tiff_files) {
|
||||||
|
tif_date <- gsub("\\.tif$", "", basename(tif_path))
|
||||||
|
|
||||||
|
# CHECK 1: Skip if this date was already migrated to field_tiles_CI/
|
||||||
|
# (This means Script 20 already processed it and extracted RDS)
|
||||||
|
if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) {
|
||||||
|
# Check if ANY field has this date in field_tiles_CI/
|
||||||
|
date_migrated <- FALSE
|
||||||
|
|
||||||
|
# Sample check: look for date in field_tiles_CI/*/DATE.tif
|
||||||
|
sample_field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
|
||||||
|
for (field_dir in sample_field_dirs) {
|
||||||
|
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
|
||||||
|
if (file.exists(potential_file)) {
|
||||||
|
date_migrated <- TRUE
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (date_migrated) {
|
||||||
|
safe_log(paste("Skipping:", tif_date, "(already migrated and processed by Script 20)"), "INFO")
|
||||||
|
total_skipped <- total_skipped + 1
|
||||||
|
next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# CHECK 2: Skip if this date already exists in field_tiles/
|
||||||
|
# (means this date has already been processed through Script 10)
|
||||||
|
if (dir.exists(field_tiles_dir)) {
|
||||||
|
date_exists_in_field_tiles <- FALSE
|
||||||
|
|
||||||
|
# Check if ANY field directory has this date
|
||||||
|
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||||
|
for (field_dir in field_dirs) {
|
||||||
|
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
|
||||||
|
if (file.exists(potential_file)) {
|
||||||
|
date_exists_in_field_tiles <- TRUE
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (date_exists_in_field_tiles) {
|
||||||
|
safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO")
|
||||||
|
total_skipped <- total_skipped + 1
|
||||||
|
next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Processing:", tif_date), "INFO")
|
||||||
|
|
||||||
|
result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir)
|
||||||
|
total_created <- total_created + result$created
|
||||||
|
total_skipped <- total_skipped + result$skipped
|
||||||
|
total_errors <- total_errors + result$errors
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Processing complete: created =", total_created,
|
||||||
|
", skipped =", total_skipped, ", errors =", total_errors), "INFO")
|
||||||
|
|
||||||
|
return(list(total_created = total_created, total_skipped = total_skipped,
|
||||||
|
total_errors = total_errors))
|
||||||
|
}
|
||||||
|
|
@ -1,202 +0,0 @@
|
||||||
# CI_EXTRACTION.R
|
|
||||||
# ==============
|
|
||||||
# This script processes satellite imagery to extract Canopy Index (CI) values for agricultural fields.
|
|
||||||
# It handles image processing, masking, and extraction of statistics by field/sub-field.
|
|
||||||
# Supports both 4-band and 8-band PlanetScope data with automatic band detection and cloud masking.
|
|
||||||
#
|
|
||||||
# Usage: Rscript 02_ci_extraction.R [end_date] [offset] [project_dir] [data_source]
|
|
||||||
# - end_date: End date for processing (YYYY-MM-DD format)
|
|
||||||
# - offset: Number of days to look back from end_date
|
|
||||||
# - project_dir: Project directory name (e.g., "angata", "aura", "chemba")
|
|
||||||
# - data_source: Data source directory - "merged_tif_8b" (default) or "merged_tif" (4-band) or "merged_final_tif"
|
|
||||||
# If tiles exist (daily_tiles_split/), they are used automatically
|
|
||||||
#
|
|
||||||
# Examples:
|
|
||||||
# # Angata 8-band data (with UDM cloud masking)
|
|
||||||
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
|
|
||||||
#
|
|
||||||
# # Aura 4-band data
|
|
||||||
# Rscript 20_ci_extraction.R 2025-11-26 7 aura merged_tif
|
|
||||||
#
|
|
||||||
# # Auto-detects and uses tiles if available:
|
|
||||||
# Rscript 20_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists)
|
|
||||||
|
|
||||||
# 1. Load required packages
|
|
||||||
# -----------------------
|
|
||||||
suppressPackageStartupMessages({
|
|
||||||
library(sf)
|
|
||||||
library(terra)
|
|
||||||
library(tidyverse)
|
|
||||||
library(lubridate)
|
|
||||||
library(readxl)
|
|
||||||
library(here)
|
|
||||||
library(furrr)
|
|
||||||
library(future)
|
|
||||||
})
|
|
||||||
|
|
||||||
# 2. Process command line arguments
|
|
||||||
# ------------------------------
|
|
||||||
main <- function() {
|
|
||||||
# Capture command line arguments
|
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
|
||||||
|
|
||||||
# Process end_date argument
|
|
||||||
if (length(args) >= 1 && !is.na(args[1])) {
|
|
||||||
end_date <- as.Date(args[1])
|
|
||||||
if (is.na(end_date)) {
|
|
||||||
warning("Invalid end_date provided. Using default (current date).")
|
|
||||||
end_date <- Sys.Date()
|
|
||||||
#end_date <- "2023-10-01"
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
end_date <- Sys.Date()
|
|
||||||
#end_date <- "2023-10-01"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process offset argument
|
|
||||||
if (length(args) >= 2 && !is.na(args[2])) {
|
|
||||||
offset <- as.numeric(args[2])
|
|
||||||
if (is.na(offset) || offset <= 0) {
|
|
||||||
warning("Invalid offset provided. Using default (7 days).")
|
|
||||||
offset <- 7
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
offset <- 7
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process project_dir argument
|
|
||||||
if (length(args) >= 3 && !is.na(args[3])) {
|
|
||||||
project_dir <- as.character(args[3])
|
|
||||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
|
||||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
|
||||||
} else {
|
|
||||||
project_dir <- "angata" # Changed default from "aura" to "esa"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif)
|
|
||||||
if (length(args) >= 4 && !is.na(args[4])) {
|
|
||||||
data_source <- as.character(args[4])
|
|
||||||
# Validate data_source is a recognized option
|
|
||||||
if (!data_source %in% c("merged_tif_8b", "merged_tif", "merged_final_tif")) {
|
|
||||||
warning(paste("Data source", data_source, "not in standard list. Using as-is."))
|
|
||||||
}
|
|
||||||
} else if (exists("data_source", envir = .GlobalEnv)) {
|
|
||||||
data_source <- get("data_source", envir = .GlobalEnv)
|
|
||||||
} else {
|
|
||||||
data_source <- "merged_tif_8b" # Default to 8-band (newer data with cloud masking)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Make project_dir and data_source available globally
|
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
cat(sprintf("CI Extraction: project=%s, end_date=%s, offset=%d days, data_source=%s\n",
|
|
||||||
project_dir, format(end_date, "%Y-%m-%d"), offset, data_source))
|
|
||||||
|
|
||||||
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
|
|
||||||
ci_extraction_script <- TRUE
|
|
||||||
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
# 3. Initialize project configuration
|
|
||||||
# --------------------------------
|
|
||||||
new_project_question <- TRUE
|
|
||||||
|
|
||||||
cat("[DEBUG] Attempting to source r_app/parameters_project.R\n")
|
|
||||||
tryCatch({
|
|
||||||
source("r_app/parameters_project.R")
|
|
||||||
cat("[DEBUG] Successfully sourced r_app/parameters_project.R\n")
|
|
||||||
}, error = function(e) {
|
|
||||||
cat("[ERROR] Failed to source r_app/parameters_project.R:\n", e$message, "\n")
|
|
||||||
stop(e)
|
|
||||||
})
|
|
||||||
|
|
||||||
cat("[DEBUG] Attempting to source r_app/20_ci_extraction_utils.R\n")
|
|
||||||
tryCatch({
|
|
||||||
source("r_app/20_ci_extraction_utils.R")
|
|
||||||
cat("[DEBUG] Successfully sourced r_app/20_ci_extraction_utils.R\n")
|
|
||||||
}, error = function(e) {
|
|
||||||
cat("[ERROR] Failed to source r_app/20_ci_extraction_utils.R:\n", e$message, "\n")
|
|
||||||
stop(e)
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
# 4. Generate date list for processing
|
|
||||||
# ---------------------------------
|
|
||||||
dates <- date_list(end_date, 7)
|
|
||||||
log_message(paste("Processing data for week", dates$week, "of", dates$year))
|
|
||||||
|
|
||||||
# 5. Find and filter raster files by date - with grid size detection
|
|
||||||
# -----------------------------------
|
|
||||||
log_message("Searching for raster files")
|
|
||||||
|
|
||||||
# Check if tiles exist (Script 01 output) - detect grid size dynamically
|
|
||||||
tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split")
|
|
||||||
|
|
||||||
# Detect grid size from daily_tiles_split folder structure
|
|
||||||
# Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc.
|
|
||||||
grid_size <- NA
|
|
||||||
if (dir.exists(tiles_split_base)) {
|
|
||||||
subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
|
|
||||||
# Look for grid size patterns like "5x5", "10x10", "20x20"
|
|
||||||
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
|
||||||
if (length(grid_patterns) > 0) {
|
|
||||||
grid_size <- grid_patterns[1] # Use first grid size found
|
|
||||||
log_message(paste("Detected grid size:", grid_size))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Construct tile folder path with grid size
|
|
||||||
if (!is.na(grid_size)) {
|
|
||||||
tile_folder <- file.path(tiles_split_base, grid_size)
|
|
||||||
} else {
|
|
||||||
tile_folder <- tiles_split_base
|
|
||||||
}
|
|
||||||
|
|
||||||
use_tiles <- dir.exists(tile_folder)
|
|
||||||
|
|
||||||
# Make grid_size available globally for other functions
|
|
||||||
assign("grid_size", grid_size, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
if (use_tiles) {
|
|
||||||
# Use tile-based processing
|
|
||||||
log_message(paste("Tile folder detected at", tile_folder))
|
|
||||||
log_message("Using tile-based CI extraction")
|
|
||||||
|
|
||||||
# Call the tile-based extraction function
|
|
||||||
process_ci_values_from_tiles(
|
|
||||||
dates = dates,
|
|
||||||
tile_folder = tile_folder,
|
|
||||||
field_boundaries = field_boundaries,
|
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
|
||||||
daily_CI_vals_dir = daily_CI_vals_dir,
|
|
||||||
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
|
|
||||||
merged_final_dir = merged_final,
|
|
||||||
grid_size = grid_size
|
|
||||||
)
|
|
||||||
|
|
||||||
} else {
|
|
||||||
# Use legacy full-extent processing
|
|
||||||
log_message("No tiles found. Using legacy full-extent approach")
|
|
||||||
|
|
||||||
# Use the existing utility function to find satellite images
|
|
||||||
existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter)
|
|
||||||
log_message(paste("Found", length(existing_files), "raster files for processing"))
|
|
||||||
|
|
||||||
# Process raster files and create VRT
|
|
||||||
vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt)
|
|
||||||
|
|
||||||
# Process and combine CI values
|
|
||||||
process_ci_values(dates, field_boundaries, merged_final,
|
|
||||||
field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
log_message(paste("Error in main processing:", e$message), level = "ERROR")
|
|
||||||
stop(e$message)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sys.nframe() == 0) {
|
|
||||||
main()
|
|
||||||
}
|
|
||||||
238
r_app/20_ci_extraction_per_field.R
Normal file
238
r_app/20_ci_extraction_per_field.R
Normal file
|
|
@ -0,0 +1,238 @@
|
||||||
|
# CI_EXTRACTION_PER_FIELD.R
|
||||||
|
# =========================
|
||||||
|
# Script 20 (Refactored for Per-Field Architecture)
|
||||||
|
#
|
||||||
|
# This script reads per-field TIFFs from Script 10 output and:
|
||||||
|
# 1. Calculates Canopy Index (CI) from 4-band imagery (RGB + NIR)
|
||||||
|
# 2. Outputs 5-band TIFFs with CI as the 5th band to field_tiles_CI/{FIELD}/{DATE}.tif
|
||||||
|
# 3. Outputs per-field per-date RDS files to daily_vals/{FIELD}/{DATE}.rds
|
||||||
|
#
|
||||||
|
# Key differences from legacy Script 20:
|
||||||
|
# - Input: field_tiles/{FIELD}/{DATE}.tif (4-band, from Script 10)
|
||||||
|
# - Output: field_tiles_CI/{FIELD}/{DATE}.tif (5-band with CI)
|
||||||
|
# - Output: daily_vals/{FIELD}/{DATE}.rds (per-field CI statistics)
|
||||||
|
# - Directly extracts CI statistics per sub_field within each field
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# Rscript 20_ci_extraction_per_field.R [project_dir] [end_date] [offset]
|
||||||
|
# Example: Rscript 20_ci_extraction_per_field.R angata 2026-01-02 7
|
||||||
|
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(sf)
|
||||||
|
library(terra)
|
||||||
|
library(tidyverse)
|
||||||
|
library(lubridate)
|
||||||
|
library(here)
|
||||||
|
})
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Main Processing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
main <- function() {
|
||||||
|
# STEP 1: Set working directory to project root (smartcane/)
|
||||||
|
# This ensures all relative paths resolve correctly
|
||||||
|
if (basename(getwd()) == "r_app") {
|
||||||
|
setwd("..")
|
||||||
|
}
|
||||||
|
|
||||||
|
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
|
||||||
|
# Parse command-line arguments FIRST
|
||||||
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||||
|
end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date()
|
||||||
|
offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
|
||||||
|
|
||||||
|
# Make project_dir available globally for parameters_project.R
|
||||||
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
|
|
||||||
|
# Load parameters_project.R (provides safe_log, date_list, setup_project_directories, etc.)
|
||||||
|
tryCatch({
|
||||||
|
source("r_app/parameters_project.R")
|
||||||
|
}, error = function(e) {
|
||||||
|
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
|
||||||
|
stop(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Load CI extraction utilities
|
||||||
|
tryCatch({
|
||||||
|
source("r_app/20_ci_extraction_utils.R")
|
||||||
|
}, error = function(e) {
|
||||||
|
cat(sprintf("Error loading 20_ci_extraction_utils.R: %s\n", e$message))
|
||||||
|
stop(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
# STEP 3: Now all utilities are loaded, proceed with script logic
|
||||||
|
safe_log(sprintf("=== Script 20: CI Extraction Per-Field ==="))
|
||||||
|
safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days",
|
||||||
|
project_dir, format(end_date, "%Y-%m-%d"), offset))
|
||||||
|
|
||||||
|
# Set up directory paths from parameters
|
||||||
|
setup <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
|
# Load field boundaries directly from field_boundaries_path in setup
|
||||||
|
tryCatch({
|
||||||
|
field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE)
|
||||||
|
safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path))
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(sprintf("Error loading field boundaries from %s: %s", setup$field_boundaries_path, e$message), "ERROR")
|
||||||
|
stop(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Get list of dates to process
|
||||||
|
# If in migration mode, dates_to_process is provided by the pipeline runner
|
||||||
|
if (exists("dates_to_process") && !is.null(dates_to_process)) {
|
||||||
|
# Migration mode: Use provided list of dates (process ALL available dates)
|
||||||
|
dates_filter <- sort(dates_to_process)
|
||||||
|
safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter)))
|
||||||
|
} else {
|
||||||
|
# Normal mode: Use 7-day offset window
|
||||||
|
dates <- date_list(end_date, offset)
|
||||||
|
dates_filter <- dates$days_filter
|
||||||
|
safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)",
|
||||||
|
dates$start_date, dates$end_date, length(dates_filter)))
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
|
||||||
|
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
|
||||||
|
safe_log(sprintf("Output RDS directory: %s", setup$daily_ci_vals_dir))
|
||||||
|
|
||||||
|
# Process each field
|
||||||
|
if (!dir.exists(setup$field_tiles_dir)) {
|
||||||
|
safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR")
|
||||||
|
stop("Script 10 output not found. Run Script 10 first.")
|
||||||
|
}
|
||||||
|
|
||||||
|
fields <- list.dirs(setup$field_tiles_dir, full.names = FALSE, recursive = FALSE)
|
||||||
|
fields <- fields[fields != ""] # Remove empty strings
|
||||||
|
|
||||||
|
if (length(fields) == 0) {
|
||||||
|
safe_log("No fields found in field_tiles directory", "WARNING")
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(sprintf("Found %d fields to process", length(fields)))
|
||||||
|
|
||||||
|
# DEBUG: Check what paths are available in setup
|
||||||
|
safe_log(sprintf("[DEBUG] Available setup paths: %s", paste(names(setup), collapse=", ")))
|
||||||
|
safe_log(sprintf("[DEBUG] field_tiles_ci_dir: %s", setup$field_tiles_ci_dir))
|
||||||
|
safe_log(sprintf("[DEBUG] daily_ci_vals_dir: %s", setup$daily_ci_vals_dir))
|
||||||
|
|
||||||
|
# Use daily_ci_vals_dir for per-field daily CI output
|
||||||
|
# Pre-create output subdirectories for all fields
|
||||||
|
for (field in fields) {
|
||||||
|
dir.create(file.path(setup$field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE)
|
||||||
|
if (!is.null(setup$daily_ci_vals_dir)) {
|
||||||
|
dir.create(file.path(setup$daily_ci_vals_dir, field), showWarnings = FALSE, recursive = TRUE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Process each DATE (load merged TIFF once, extract all fields from it)
|
||||||
|
total_success <- 0
|
||||||
|
total_error <- 0
|
||||||
|
|
||||||
|
for (date_str in dates_filter) {
|
||||||
|
# Load the MERGED TIFF (farm-wide) ONCE for this date
|
||||||
|
input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
|
||||||
|
|
||||||
|
if (!file.exists(input_tif_merged)) {
|
||||||
|
safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
|
||||||
|
total_error <<- total_error + 1
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Load 4-band TIFF ONCE
|
||||||
|
raster_4band <- terra::rast(input_tif_merged)
|
||||||
|
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
|
||||||
|
|
||||||
|
# Calculate CI from 4-band
|
||||||
|
ci_raster <- calc_ci_from_raster(raster_4band)
|
||||||
|
|
||||||
|
# Create 5-band (R, G, B, NIR, CI)
|
||||||
|
five_band <- c(raster_4band, ci_raster)
|
||||||
|
|
||||||
|
# Now process all fields from this single merged TIFF
|
||||||
|
fields_processed_this_date <- 0
|
||||||
|
|
||||||
|
for (field in fields) {
|
||||||
|
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
|
||||||
|
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
|
||||||
|
|
||||||
|
# Pre-create output directories
|
||||||
|
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
|
||||||
|
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
|
||||||
|
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
|
||||||
|
|
||||||
|
# MODE 3: Skip if both outputs already exist
|
||||||
|
if (file.exists(output_tif) && file.exists(output_rds)) {
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists
|
||||||
|
if (file.exists(output_tif) && !file.exists(output_rds)) {
|
||||||
|
tryCatch({
|
||||||
|
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
|
||||||
|
fields_processed_this_date <- fields_processed_this_date + 1
|
||||||
|
}, error = function(e) {
|
||||||
|
# Continue to next field
|
||||||
|
})
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
|
||||||
|
tryCatch({
|
||||||
|
# Crop 5-band TIFF to field boundary
|
||||||
|
field_geom <- field_boundaries_sf %>% filter(field == !!field)
|
||||||
|
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
|
||||||
|
|
||||||
|
# Save 5-band field TIFF
|
||||||
|
terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
|
||||||
|
|
||||||
|
# Extract CI statistics by sub_field (from cropped CI raster)
|
||||||
|
ci_cropped <- five_band_cropped[[5]] # 5th band is CI
|
||||||
|
ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field)
|
||||||
|
|
||||||
|
# Save RDS
|
||||||
|
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
|
||||||
|
saveRDS(ci_stats, output_rds)
|
||||||
|
}
|
||||||
|
|
||||||
|
fields_processed_this_date <- fields_processed_this_date + 1
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
# Error in individual field, continue to next
|
||||||
|
safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
# Increment success counter if at least one field succeeded
|
||||||
|
if (fields_processed_this_date > 0) {
|
||||||
|
total_success <<- total_success + 1
|
||||||
|
safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date))
|
||||||
|
}
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
total_error <<- total_error + 1
|
||||||
|
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
safe_log(sprintf("\n=== Processing Complete ==="))
|
||||||
|
safe_log(sprintf("Successfully processed: %d", total_success))
|
||||||
|
safe_log(sprintf("Errors encountered: %d", total_error))
|
||||||
|
|
||||||
|
if (total_success > 0) {
|
||||||
|
safe_log("Output files created in:")
|
||||||
|
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
|
||||||
|
safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Execute main if called from command line
|
||||||
|
if (sys.nframe() == 0) {
|
||||||
|
main()
|
||||||
|
}
|
||||||
|
|
@ -6,24 +6,10 @@
|
||||||
#
|
#
|
||||||
# Parallel Processing: Tile-based extraction uses furrr::future_map to process
|
# Parallel Processing: Tile-based extraction uses furrr::future_map to process
|
||||||
# multiple tiles simultaneously (typically 2-4 tiles in parallel depending on CPU cores)
|
# multiple tiles simultaneously (typically 2-4 tiles in parallel depending on CPU cores)
|
||||||
|
#
|
||||||
#' Safe logging function that works whether log_message exists or not
|
# Per-Field Functions (Script 20):
|
||||||
#'
|
# - calc_ci_from_raster(): Calculate CI from 4-band raster (Chlorophyll Index formula: NIR/Green - 1)
|
||||||
#' @param message The message to log
|
# - extract_ci_by_subfield(): Extract per-sub_field CI statistics from raster
|
||||||
#' @param level The log level (default: "INFO")
|
|
||||||
#' @return NULL (used for side effects)
|
|
||||||
#'
|
|
||||||
safe_log <- function(message, level = "INFO") {
|
|
||||||
if (exists("log_message")) {
|
|
||||||
log_message(message, level)
|
|
||||||
} else {
|
|
||||||
if (level %in% c("ERROR", "WARNING")) {
|
|
||||||
warning(message)
|
|
||||||
} else {
|
|
||||||
message(message)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Generate a sequence of dates for processing
|
#' Generate a sequence of dates for processing
|
||||||
#'
|
#'
|
||||||
|
|
@ -216,9 +202,10 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
|
||||||
names(blue_band) <- "Blue"
|
names(blue_band) <- "Blue"
|
||||||
names(nir_band) <- "NIR"
|
names(nir_band) <- "NIR"
|
||||||
|
|
||||||
# Calculate Canopy Index from Red, Green, NIR
|
# Calculate Canopy Index from Green and NIR
|
||||||
# CI = (NIR - Red) / (NIR + Red) is a common formulation
|
# *** CRITICAL: Use CHLOROPHYLL INDEX formula ONLY ***
|
||||||
# But using NIR/Green - 1 is also valid and more sensitive to green vegetation
|
# CORRECT: CI = NIR / Green - 1 (ranges ~1-7, sensitive to active chlorophyll)
|
||||||
|
# WRONG: Do NOT use (NIR-Red)/(NIR+Red) - that is NDVI, ranges -1 to 1, different scale
|
||||||
CI <- nir_band / green_band - 1
|
CI <- nir_band / green_band - 1
|
||||||
names(CI) <- "CI"
|
names(CI) <- "CI"
|
||||||
|
|
||||||
|
|
@ -294,7 +281,6 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
|
||||||
|
|
||||||
# Write output files
|
# Write output files
|
||||||
terra::writeRaster(output_raster, new_file, overwrite = TRUE)
|
terra::writeRaster(output_raster, new_file, overwrite = TRUE)
|
||||||
terra::vrt(new_file, vrt_file, overwrite = TRUE)
|
|
||||||
|
|
||||||
# Check if the result has enough valid pixels
|
# Check if the result has enough valid pixels
|
||||||
valid_pixels <- terra::global(output_raster$CI, "notNA", na.rm=TRUE)
|
valid_pixels <- terra::global(output_raster$CI, "notNA", na.rm=TRUE)
|
||||||
|
|
@ -738,7 +724,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
|
||||||
date = date,
|
date = date,
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
daily_CI_vals_dir = daily_CI_vals_dir,
|
daily_CI_vals_dir = daily_CI_vals_dir,
|
||||||
merged_final_tif_dir = merged_final_dir,
|
|
||||||
grid_size = grid_size
|
grid_size = grid_size
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -809,7 +794,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
|
||||||
date = date,
|
date = date,
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
daily_CI_vals_dir = daily_CI_vals_dir,
|
daily_CI_vals_dir = daily_CI_vals_dir,
|
||||||
merged_final_tif_dir = merged_final_dir,
|
|
||||||
grid_size = grid_size
|
grid_size = grid_size
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -850,7 +834,7 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
|
||||||
#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure
|
#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure
|
||||||
#' @return Data frame with field CI statistics for this tile, or NULL if processing failed
|
#' @return Data frame with field CI statistics for this tile, or NULL if processing failed
|
||||||
#'
|
#'
|
||||||
process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = NA) {
|
process_single_tile <- function(tile_file, field_boundaries_sf, date, grid_size = NA) {
|
||||||
tryCatch({
|
tryCatch({
|
||||||
tile_filename <- basename(tile_file)
|
tile_filename <- basename(tile_file)
|
||||||
safe_log(paste(" [TILE] Loading:", tile_filename))
|
safe_log(paste(" [TILE] Loading:", tile_filename))
|
||||||
|
|
@ -892,25 +876,10 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin
|
||||||
output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band)
|
output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band)
|
||||||
names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI")
|
names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||||
|
|
||||||
# Save processed tile to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ with same filename
|
# NOTE: Do NOT save processed tile - it's an intermediate only
|
||||||
# This mirrors the input structure: daily_tiles_split/[GRID_SIZE]/[DATE]/
|
# The purpose is to calculate field-level CI statistics, not to create permanent tile files
|
||||||
if (!is.na(grid_size)) {
|
# This prevents bloat in merged_final_tif/ directory (would unnecessarily duplicate
|
||||||
date_dir <- file.path(merged_final_tif_dir, grid_size, date)
|
# daily_tiles_split data with an extra CI band added)
|
||||||
} else {
|
|
||||||
date_dir <- file.path(merged_final_tif_dir, date)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!dir.exists(date_dir)) {
|
|
||||||
dir.create(date_dir, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Use same filename as source tile (e.g., 2026-01-02_01.tif)
|
|
||||||
tile_filename <- basename(tile_file)
|
|
||||||
output_file <- file.path(date_dir, tile_filename)
|
|
||||||
|
|
||||||
# Write processed tile
|
|
||||||
terra::writeRaster(output_raster, output_file, overwrite = TRUE)
|
|
||||||
safe_log(paste(" [SAVED TIFF] Output:", file.path(date, basename(output_file)), "(5 bands: Red, Green, Blue, NIR, CI)"))
|
|
||||||
|
|
||||||
# Extract statistics per field from CI band
|
# Extract statistics per field from CI band
|
||||||
field_bbox <- sf::st_bbox(field_boundaries_sf)
|
field_bbox <- sf::st_bbox(field_boundaries_sf)
|
||||||
|
|
@ -951,7 +920,7 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin
|
||||||
#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure
|
#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure
|
||||||
#' @return Data frame with field CI statistics for the date
|
#' @return Data frame with field CI statistics for the date
|
||||||
#'
|
#'
|
||||||
extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL, grid_size = NA) {
|
extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, grid_size = NA) {
|
||||||
|
|
||||||
if (!inherits(field_boundaries_sf, "sf")) {
|
if (!inherits(field_boundaries_sf, "sf")) {
|
||||||
field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf)
|
field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf)
|
||||||
|
|
@ -970,7 +939,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
|
||||||
stats_list <- tryCatch({
|
stats_list <- tryCatch({
|
||||||
furrr::future_map(
|
furrr::future_map(
|
||||||
tile_files,
|
tile_files,
|
||||||
~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size),
|
~ process_single_tile(.x, field_boundaries_sf, date, grid_size = grid_size),
|
||||||
.progress = FALSE,
|
.progress = FALSE,
|
||||||
.options = furrr::furrr_options(seed = TRUE)
|
.options = furrr::furrr_options(seed = TRUE)
|
||||||
)
|
)
|
||||||
|
|
@ -980,7 +949,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
|
||||||
lapply(
|
lapply(
|
||||||
tile_files,
|
tile_files,
|
||||||
function(tile_file) {
|
function(tile_file) {
|
||||||
process_single_tile(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size)
|
process_single_tile(tile_file, field_boundaries_sf, date, grid_size = grid_size)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
@ -1013,3 +982,208 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
|
||||||
|
|
||||||
return(aggregated)
|
return(aggregated)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Script 20 (Per-Field) Specific Functions
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
#' Calculate Canopy Index (CI) from 4-band raster
|
||||||
|
#'
|
||||||
|
#' *** CRITICAL FORMULA: CI = NIR / Green - 1 ***
|
||||||
|
#' This is the CHLOROPHYLL INDEX formula (ranges ~1-7 for vegetation).
|
||||||
|
#' NOT NDVI! Do NOT use (NIR-Red)/(NIR+Red) - that produces -1 to 1 range.
|
||||||
|
#'
|
||||||
|
#' Expects band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4)
|
||||||
|
#'
|
||||||
|
#' @param raster_obj Loaded raster object with at least 4 bands
|
||||||
|
#' @return Raster object containing CI values (Chlorophyll Index, ranges ~1-7)
|
||||||
|
#'
|
||||||
|
calc_ci_from_raster <- function(raster_obj) {
|
||||||
|
# Expected band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4)
|
||||||
|
if (terra::nlyr(raster_obj) < 4) {
|
||||||
|
stop("Raster has fewer than 4 bands. Cannot calculate CI.")
|
||||||
|
}
|
||||||
|
|
||||||
|
green <- terra::subset(raster_obj, 2) # Green band (required for proper CI calculation)
|
||||||
|
nir <- terra::subset(raster_obj, 4) # NIR
|
||||||
|
|
||||||
|
# *** CHLOROPHYLL INDEX = NIR / Green - 1 ***
|
||||||
|
# This formula is sensitive to active chlorophyll content and ranges ~1-7
|
||||||
|
# DO NOT use (NIR-Red)/(NIR+Red) - that is NDVI (Normalized Difference Vegetation Index)
|
||||||
|
# NDVI ranges -1 to 1 and is different from Chlorophyll Index
|
||||||
|
ci <- nir / green - 1
|
||||||
|
|
||||||
|
return(ci)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Extract CI statistics by sub_field from a CI raster
|
||||||
|
#'
|
||||||
|
#' For a given field, masks the CI raster to each sub_field polygon
|
||||||
|
#' and calculates statistics (mean, median, sd, min, max, count).
|
||||||
|
#'
|
||||||
|
#' @param ci_raster Raster object containing CI values
|
||||||
|
#' @param field_boundaries_sf SF object containing field/sub_field polygons
|
||||||
|
#' @param field_name Character string of the field name to process
|
||||||
|
#' @return Data frame with field, sub_field, and CI statistics; NULL if field not found
|
||||||
|
#'
|
||||||
|
extract_ci_by_subfield <- function(ci_raster, field_boundaries_sf, field_name) {
|
||||||
|
# NOTE: Per-field TIFFs are already cropped to field boundaries by Script 10
|
||||||
|
# No need to mask again - just extract all valid pixels from the raster
|
||||||
|
|
||||||
|
# Extract ALL CI values (no masking needed for pre-cropped per-field TIFFs)
|
||||||
|
ci_values <- terra::values(ci_raster, na.rm = TRUE)
|
||||||
|
|
||||||
|
if (length(ci_values) > 0) {
|
||||||
|
result_row <- data.frame(
|
||||||
|
field = field_name,
|
||||||
|
sub_field = field_name, # Use field_name as sub_field since TIFF is already field-specific
|
||||||
|
ci_mean = mean(ci_values, na.rm = TRUE),
|
||||||
|
ci_median = median(ci_values, na.rm = TRUE),
|
||||||
|
ci_sd = sd(ci_values, na.rm = TRUE),
|
||||||
|
ci_min = min(ci_values, na.rm = TRUE),
|
||||||
|
ci_max = max(ci_values, na.rm = TRUE),
|
||||||
|
ci_count = length(ci_values),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
result_row <- data.frame(
|
||||||
|
field = field_name,
|
||||||
|
sub_field = field_name,
|
||||||
|
ci_mean = NA_real_,
|
||||||
|
ci_median = NA_real_,
|
||||||
|
ci_sd = NA_real_,
|
||||||
|
ci_min = NA_real_,
|
||||||
|
ci_max = NA_real_,
|
||||||
|
ci_count = 0,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return(result_row)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Extract RDS from existing CI TIFF (Migration/Regeneration Mode)
|
||||||
|
#'
|
||||||
|
#' This function extracts CI statistics from an already-calculated CI TIFF
|
||||||
|
#' without needing to recalculate from raw 4-band imagery.
|
||||||
|
#' Used during migration when field_tiles_CI/ exists but daily_vals/{FIELD}/ is missing.
|
||||||
|
#'
|
||||||
|
#' @param ci_tiff_path Path to the 5-band TIFF containing CI as the 5th band
|
||||||
|
#' @param output_rds_path Path where to save the output RDS file
|
||||||
|
#' @param field_boundaries_sf SF object with field/sub_field polygons
|
||||||
|
#' @param field_name Name of the field to extract
|
||||||
|
#' @return The RDS data frame (invisibly) and saves to disk
|
||||||
|
#'
|
||||||
|
extract_rds_from_ci_tiff <- function(ci_tiff_path, output_rds_path, field_boundaries_sf, field_name) {
|
||||||
|
tryCatch({
|
||||||
|
# Load the 5-band TIFF
|
||||||
|
raster_5band <- terra::rast(ci_tiff_path)
|
||||||
|
|
||||||
|
# Extract CI (5th band)
|
||||||
|
# Assuming structure: [1]=R, [2]=G, [3]=B, [4]=NIR, [5]=CI
|
||||||
|
ci_raster <- raster_5band[[5]]
|
||||||
|
|
||||||
|
# Extract CI statistics by sub_field
|
||||||
|
ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field_name)
|
||||||
|
|
||||||
|
# Save RDS
|
||||||
|
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
|
||||||
|
saveRDS(ci_stats, output_rds_path)
|
||||||
|
return(invisible(ci_stats))
|
||||||
|
} else {
|
||||||
|
safe_log(sprintf("No CI statistics extracted from %s", ci_tiff_path), "WARNING")
|
||||||
|
return(invisible(NULL))
|
||||||
|
}
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(sprintf("Error extracting RDS from CI TIFF: %s", e$message), "ERROR")
|
||||||
|
return(invisible(NULL))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Regenerate ALL missing RDS files from existing CI TIFFs (Comprehensive Migration Mode)
|
||||||
|
#'
|
||||||
|
#' This function processes ALL dates in field_tiles_CI/ and extracts RDS for any missing daily_vals/
|
||||||
|
#' files. No date window filtering - processes the entire historical archive.
|
||||||
|
#'
|
||||||
|
#' Used for one-time migration of old projects where field_tiles_CI/ is populated but daily_vals/
|
||||||
|
#' RDS files are missing or incomplete.
|
||||||
|
#'
|
||||||
|
#' @param field_tiles_ci_dir Path to field_tiles_CI/ (input: pre-calculated CI TIFFs)
|
||||||
|
#' @param daily_vals_dir Path to daily_vals/ (output: RDS statistics files)
|
||||||
|
#' @param field_boundaries_sf SF object with field/sub_field polygons
|
||||||
|
#' @param fields Vector of field names to process
|
||||||
|
#'
|
||||||
|
#' @return List with summary stats: list(total_processed=N, total_skipped=M, total_errors=K)
|
||||||
|
#'
|
||||||
|
regenerate_all_missing_rds <- function(field_tiles_ci_dir, daily_vals_dir, field_boundaries_sf, fields) {
|
||||||
|
safe_log("\n========================================")
|
||||||
|
safe_log("MIGRATION MODE: REGENERATING ALL MISSING RDS")
|
||||||
|
safe_log("Processing ALL dates in field_tiles_CI/")
|
||||||
|
safe_log("========================================")
|
||||||
|
|
||||||
|
total_processed <- 0
|
||||||
|
total_skipped <- 0
|
||||||
|
total_errors <- 0
|
||||||
|
|
||||||
|
# Iterate through each field
|
||||||
|
for (field in fields) {
|
||||||
|
field_ci_path <- file.path(field_tiles_ci_dir, field)
|
||||||
|
field_daily_vals_path <- file.path(daily_vals_dir, field)
|
||||||
|
|
||||||
|
# Skip if field directory doesn't exist
|
||||||
|
if (!dir.exists(field_ci_path)) {
|
||||||
|
safe_log(sprintf(" Field %s: field_tiles_CI not found (skipping)", field))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create output directory for RDS
|
||||||
|
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
|
||||||
|
# Find ALL CI TIFFs for this field (no date filtering)
|
||||||
|
ci_tiff_files <- list.files(
|
||||||
|
path = field_ci_path,
|
||||||
|
pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$",
|
||||||
|
full.names = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
if (length(ci_tiff_files) == 0) {
|
||||||
|
safe_log(sprintf(" Field %s: No CI TIFFs found (skipping)", field))
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(sprintf(" Field %s: Found %d CI TIFFs to process", field, length(ci_tiff_files)))
|
||||||
|
|
||||||
|
# Process each CI TIFF
|
||||||
|
for (ci_tiff in ci_tiff_files) {
|
||||||
|
date_str <- tools::file_path_sans_ext(basename(ci_tiff))
|
||||||
|
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
|
||||||
|
|
||||||
|
# Skip if RDS already exists
|
||||||
|
if (file.exists(output_rds)) {
|
||||||
|
total_skipped <- total_skipped + 1
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract RDS from CI TIFF
|
||||||
|
tryCatch({
|
||||||
|
extract_rds_from_ci_tiff(ci_tiff, output_rds, field_boundaries_sf, field)
|
||||||
|
safe_log(sprintf(" %s: ✓ RDS extracted", date_str))
|
||||||
|
total_processed <- total_processed + 1
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(sprintf(" %s: ✗ Error - %s", date_str, e$message), "ERROR")
|
||||||
|
total_errors <<- total_errors + 1
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(sprintf("\nMigration complete: processed %d, skipped %d, errors %d",
|
||||||
|
total_processed, total_skipped, total_errors))
|
||||||
|
|
||||||
|
return(list(
|
||||||
|
total_processed = total_processed,
|
||||||
|
total_skipped = total_skipped,
|
||||||
|
total_errors = total_errors
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,62 @@
|
||||||
# 02b_CONVERT_CI_RDS_TO_CSV.R
|
# ============================================================================
|
||||||
# ============================
|
# SCRIPT 21: Convert CI RDS to CSV (Python Compatibility Format)
|
||||||
# Convert combined_CI_data.rds (output of script 02) to CSV format for Python
|
# ============================================================================
|
||||||
# This script runs AFTER script 02 (CI extraction) and creates a CSV that Python
|
# PURPOSE:
|
||||||
# can use for harvest date detection WITHOUT requiring the 'model' column (which
|
# Convert consolidated CI data from R's wide format (RDS) to Python-compatible
|
||||||
# comes from script 03 after interpolation and harvest dates are known).
|
# long format (CSV). Prepares per-field CI time series for harvest detection
|
||||||
|
# models and Python ML workflows without requiring interpolated/modeled values.
|
||||||
#
|
#
|
||||||
# Usage: Rscript 02b_convert_ci_rds_to_csv.R [project_dir]
|
# INPUT DATA:
|
||||||
# - project_dir: Project directory name (e.g., "esa", "chemba", "angata")
|
# - Source: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
# - Format: RDS (interpolated growth model data from Script 30)
|
||||||
|
# - Requirement: Script 30 must have completed growth model interpolation
|
||||||
#
|
#
|
||||||
# Output: CSV file at laravel_app/storage/app/{project_dir}/Data/extracted_ci/cumulative_vals/ci_data_for_python.csv
|
# OUTPUT DATA:
|
||||||
# Columns: field, sub_field, Date, FitData, DOY, value (alias for FitData)
|
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
||||||
|
# - Format: CSV (long format)
|
||||||
|
# - Columns: field, sub_field, Date, FitData, DOY, value
|
||||||
#
|
#
|
||||||
|
# USAGE:
|
||||||
|
# Rscript 21_convert_ci_rds_to_csv.R [project]
|
||||||
|
#
|
||||||
|
# Example (Windows PowerShell):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# CLIENT TYPES:
|
||||||
|
# - cane_supply (ANGATA): Yes - data export
|
||||||
|
# - agronomic_support (AURA): Yes - Python ML integration
|
||||||
|
#
|
||||||
|
# DEPENDENCIES:
|
||||||
|
# - Packages: tidyverse, lubridate, zoo
|
||||||
|
# - Utils files: parameters_project.R
|
||||||
|
# - Input data: combined_CI_data.rds from Script 20
|
||||||
|
# - Data directories: extracted_ci/cumulative_vals/
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
|
||||||
|
# - Handles both wide format and long format inputs from growth model
|
||||||
|
# - DOY (Day of Year): Calculated from date for seasonal analysis
|
||||||
|
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
||||||
|
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
||||||
|
# - Exports complete growth curves with interpolated values for ML training
|
||||||
|
#
|
||||||
|
# RELATED ISSUES:
|
||||||
|
# SC-112: Utilities restructuring
|
||||||
|
# SC-108: Core pipeline improvements
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
suppressPackageStartupMessages({
|
suppressPackageStartupMessages({
|
||||||
library(tidyverse)
|
# File path handling
|
||||||
library(lubridate)
|
library(here) # For relative path resolution (platform-independent file paths)
|
||||||
library(zoo)
|
|
||||||
library(here)
|
# Data manipulation
|
||||||
|
library(tidyverse) # For dplyr, readr (data wrangling and CSV I/O)
|
||||||
|
library(lubridate) # For date/time operations (DOY calculation)
|
||||||
|
library(zoo) # For zoo objects (gap filling, rolling operations)
|
||||||
})
|
})
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -140,7 +180,7 @@ main <- function() {
|
||||||
|
|
||||||
cat(sprintf("Converting CI RDS to CSV: project=%s\n", project_dir))
|
cat(sprintf("Converting CI RDS to CSV: project=%s\n", project_dir))
|
||||||
|
|
||||||
# Initialize project configuration
|
# Initialize project configuration and centralized paths
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source("parameters_project.R")
|
source("parameters_project.R")
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
|
|
@ -152,49 +192,63 @@ main <- function() {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
# Define paths
|
# Load centralized path structure (creates all directories automatically)
|
||||||
ci_data_source_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals")
|
paths <- setup_project_directories(project_dir)
|
||||||
ci_data_output_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "ci_data_for_python")
|
|
||||||
|
|
||||||
# Create output directory if it doesn't exist (for new projects)
|
# Use centralized paths (no need for dir.create - already handled)
|
||||||
if (!dir.exists(ci_data_output_dir)) {
|
ci_data_source_dir <- paths$cumulative_ci_vals_dir
|
||||||
dir.create(ci_data_output_dir, recursive = TRUE, showWarnings = FALSE)
|
ci_data_output_dir <- paths$ci_for_python_dir
|
||||||
cat(sprintf("✓ Created output directory: %s\n", ci_data_output_dir))
|
|
||||||
}
|
|
||||||
|
|
||||||
input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds")
|
# Try to load interpolated growth model data from Script 30
|
||||||
|
input_file <- file.path(ci_data_source_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
||||||
output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
|
output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
|
||||||
|
|
||||||
# Check if input file exists
|
# Check if input file exists
|
||||||
if (!file.exists(input_file)) {
|
if (!file.exists(input_file)) {
|
||||||
stop(paste("Input file not found:", input_file))
|
stop(paste("Input file not found:", input_file,
|
||||||
|
"\nScript 30 (growth model) must be run before Script 21."))
|
||||||
}
|
}
|
||||||
|
|
||||||
cat(sprintf("Loading: %s\n", input_file))
|
cat(sprintf("Loading: %s\n", input_file))
|
||||||
|
|
||||||
# Load RDS file
|
# Load RDS file (from Script 30 - already in long format with interpolated values)
|
||||||
ci_data_wide <- readRDS(input_file) %>%
|
ci_data <- readRDS(input_file) %>%
|
||||||
as_tibble()
|
as_tibble()
|
||||||
|
|
||||||
cat(sprintf(" Loaded %d rows\n", nrow(ci_data_wide)))
|
cat(sprintf(" Loaded %d rows\n", nrow(ci_data)))
|
||||||
cat(sprintf(" Format: WIDE (field, sub_field, then dates as columns)\n"))
|
cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", ")))
|
||||||
cat(sprintf(" Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", ")))
|
|
||||||
|
|
||||||
# Step 1: Convert from WIDE to LONG format
|
# Check format and prepare for export
|
||||||
cat("\nStep 1: Converting from wide to long format...\n")
|
# If it's already in long format (from Script 30), use as-is
|
||||||
ci_data_long <- wide_to_long_ci_data(ci_data_wide)
|
# Otherwise, convert from wide to long
|
||||||
|
if ("Date" %in% names(ci_data) || "date" %in% names(ci_data)) {
|
||||||
|
cat(" Detected: LONG format (from growth model)\n")
|
||||||
|
ci_data_long <- ci_data
|
||||||
|
} else {
|
||||||
|
cat(" Detected: WIDE format - converting to long...\n")
|
||||||
|
ci_data_long <- wide_to_long_ci_data(ci_data)
|
||||||
|
}
|
||||||
|
|
||||||
# Step 2: Create complete daily sequences with interpolation
|
# Step 1: Ensure Date column exists and is properly formatted
|
||||||
|
ci_data_long <- ci_data_long %>%
|
||||||
|
mutate(
|
||||||
|
Date = as.Date(Date)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 2: If interpolated values already present, use them; otherwise create interpolated sequences
|
||||||
|
if ("value" %in% names(ci_data_long)) {
|
||||||
|
# Already has interpolated values from Script 30
|
||||||
|
cat("\nStep 2: Using interpolated values from growth model...\n")
|
||||||
|
ci_data_python <- ci_data_long
|
||||||
|
} else {
|
||||||
|
# Create interpolated daily sequences
|
||||||
cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
|
cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
|
||||||
ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
|
ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
|
||||||
|
}
|
||||||
# Step 3: Validate output
|
|
||||||
cat("\nStep 3: Validating output...")
|
|
||||||
validate_conversion_output(ci_data_python)
|
|
||||||
|
|
||||||
# Step 4: Save to CSV
|
# Step 4: Save to CSV
|
||||||
cat(sprintf("\nStep 4: Saving to CSV...\n"))
|
cat(sprintf("\nStep 4: Saving to CSV...\\n"))
|
||||||
cat(sprintf(" Output: %s\n", output_file))
|
cat(sprintf(" Output: %s\\n", output_file))
|
||||||
write_csv(ci_data_python, output_file)
|
write_csv(ci_data_python, output_file)
|
||||||
|
|
||||||
cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
|
cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
|
||||||
|
|
|
||||||
|
|
@ -4,57 +4,143 @@
|
||||||
# ===================
|
# ===================
|
||||||
# Utility functions for growth model interpolation and manipulation.
|
# Utility functions for growth model interpolation and manipulation.
|
||||||
# These functions support the creation of continuous growth models from point measurements.
|
# These functions support the creation of continuous growth models from point measurements.
|
||||||
|
#
|
||||||
|
# PERFORMANCE OPTIMIZATION:
|
||||||
|
# - Parallel file I/O: Reads 450k+ RDS files using furrr::future_map_dfr()
|
||||||
|
# - Parallel field interpolation: Processes fields in parallel (1 core per ~100 fields)
|
||||||
|
# - Dynamic CPU detection: Allocates workers based on available cores
|
||||||
|
# - Windows compatible: Uses furrr with plan(multisession) for cross-platform support
|
||||||
|
|
||||||
#' Safe logging function that works whether log_message exists or not
|
#' Load and prepare the combined CI data (Per-Field Architecture)
|
||||||
|
#' OPTIMIZE: Filters by date during load (skip unnecessary date ranges)
|
||||||
|
#' PARALLELIZE: Reads 450k+ RDS files in parallel using furrr::future_map_dfr()
|
||||||
#'
|
#'
|
||||||
#' @param message The message to log
|
#' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals)
|
||||||
#' @param level The log level (default: "INFO")
|
#' @param harvesting_data Optional: Dataframe with season dates. If provided, only loads files within season ranges (major speedup)
|
||||||
#' @return NULL (used for side effects)
|
#' @return Long-format dataframe with CI values by date and field
|
||||||
#'
|
#'
|
||||||
safe_log <- function(message, level = "INFO") {
|
load_combined_ci_data <- function(daily_vals_dir, harvesting_data = NULL) {
|
||||||
if (exists("log_message")) {
|
# For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals
|
||||||
log_message(message, level)
|
# Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
||||||
|
|
||||||
|
if (!dir.exists(daily_vals_dir)) {
|
||||||
|
stop(paste("Daily values directory not found:", daily_vals_dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Loading per-field CI data from:", daily_vals_dir))
|
||||||
|
|
||||||
|
# OPTIMIZATION: If harvest data provided, extract date range to avoid loading unnecessary dates
|
||||||
|
date_filter_min <- NULL
|
||||||
|
date_filter_max <- NULL
|
||||||
|
if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
|
||||||
|
date_filter_min <- min(harvesting_data$season_start, na.rm = TRUE)
|
||||||
|
date_filter_max <- max(harvesting_data$season_end, na.rm = TRUE)
|
||||||
|
safe_log(sprintf("Pre-filtering by harvest season dates: %s to %s",
|
||||||
|
format(date_filter_min, "%Y-%m-%d"),
|
||||||
|
format(date_filter_max, "%Y-%m-%d")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Find all daily RDS files recursively (per-field structure)
|
||||||
|
# IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories
|
||||||
|
all_daily_files <- list.files(
|
||||||
|
path = daily_vals_dir,
|
||||||
|
pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$", # Only YYYY-MM-DD.rds format
|
||||||
|
full.names = TRUE,
|
||||||
|
recursive = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Further filter: only keep files that are in a subdirectory (per-field structure)
|
||||||
|
# Exclude legacy files at the root level like "extracted_2024-02-29_whole_field.rds"
|
||||||
|
all_daily_files <- all_daily_files[basename(dirname(all_daily_files)) != "daily_vals"]
|
||||||
|
|
||||||
|
if (length(all_daily_files) == 0) {
|
||||||
|
stop(paste("No per-field daily RDS files found in:", daily_vals_dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(sprintf("Found %d per-field daily RDS files (filtered from legacy format)", length(all_daily_files)))
|
||||||
|
|
||||||
|
# OPTIMIZATION: Filter files by filename date BEFORE parallel loading
|
||||||
|
# Skip files outside harvest season (can save 60-80% of I/O on large datasets)
|
||||||
|
if (!is.null(date_filter_min) && !is.null(date_filter_max)) {
|
||||||
|
all_daily_files <- all_daily_files[
|
||||||
|
{
|
||||||
|
dates <- as.Date(tools::file_path_sans_ext(basename(all_daily_files)), format = "%Y-%m-%d")
|
||||||
|
!is.na(dates) & dates >= date_filter_min & dates <= date_filter_max
|
||||||
|
}
|
||||||
|
]
|
||||||
|
safe_log(sprintf("Filtered to %d files within harvest season date range", length(all_daily_files)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set up parallel future plan (Windows PSOCK multisession; Mac/Linux can use forking)
|
||||||
|
# Automatically detect available cores and limit to reasonable number
|
||||||
|
n_cores <- min(parallel::detectCores() - 1, 8) # Use max 8 cores (diminishing returns after)
|
||||||
|
future::plan(strategy = future::multisession, workers = n_cores)
|
||||||
|
safe_log(sprintf("Using %d parallel workers for file I/O", n_cores))
|
||||||
|
|
||||||
|
# Parallel file reading: future_map_dfr processes each file in parallel
|
||||||
|
# Returns combined dataframe directly (no need to rbind)
|
||||||
|
combined_long <- furrr::future_map_dfr(
|
||||||
|
all_daily_files,
|
||||||
|
.progress = TRUE,
|
||||||
|
.options = furrr::furrr_options(seed = TRUE),
|
||||||
|
function(file) {
|
||||||
|
# Extract date from filename: {YYYY-MM-DD}.rds
|
||||||
|
filename <- basename(file)
|
||||||
|
date_str <- tools::file_path_sans_ext(filename)
|
||||||
|
|
||||||
|
# Parse date
|
||||||
|
if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) {
|
||||||
|
parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
|
||||||
} else {
|
} else {
|
||||||
if (level %in% c("ERROR", "WARNING")) {
|
return(data.frame()) # Return empty dataframe if parse fails
|
||||||
warning(message)
|
|
||||||
} else {
|
|
||||||
message(message)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Load and prepare the combined CI data
|
if (is.na(parsed_date)) {
|
||||||
#'
|
return(data.frame())
|
||||||
#' @param data_dir Directory containing the combined CI data
|
|
||||||
#' @return Long-format dataframe with CI values by date
|
|
||||||
#'
|
|
||||||
load_combined_ci_data <- function(data_dir) {
|
|
||||||
file_path <- here::here(data_dir, "combined_CI_data.rds")
|
|
||||||
|
|
||||||
if (!file.exists(file_path)) {
|
|
||||||
stop(paste("Combined CI data file not found:", file_path))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
safe_log(paste("Loading CI data from:", file_path))
|
# Read RDS file
|
||||||
|
tryCatch({
|
||||||
|
rds_data <- readRDS(file)
|
||||||
|
|
||||||
# Load and transform the data to long format
|
if (is.null(rds_data) || nrow(rds_data) == 0) {
|
||||||
pivot_stats <- readRDS(file_path) %>%
|
return(data.frame())
|
||||||
dplyr::ungroup() %>%
|
}
|
||||||
dplyr::group_by(field, sub_field) %>%
|
|
||||||
dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
|
|
||||||
|
|
||||||
pivot_stats_long <- pivot_stats %>%
|
# Add date column to the data
|
||||||
tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
|
rds_data %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(Date = parsed_date)
|
||||||
Date = lubridate::ymd(Date),
|
|
||||||
value = as.numeric(value)
|
|
||||||
) %>%
|
|
||||||
tidyr::drop_na(c("value", "Date")) %>%
|
|
||||||
dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names
|
|
||||||
dplyr::filter(!is.infinite(value)) %>%
|
|
||||||
dplyr::distinct()
|
|
||||||
|
|
||||||
safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
|
}, error = function(e) {
|
||||||
|
return(data.frame()) # Return empty dataframe on error
|
||||||
|
})
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return to sequential processing to avoid nested parallelism
|
||||||
|
future::plan(future::sequential)
|
||||||
|
|
||||||
|
if (nrow(combined_long) == 0) {
|
||||||
|
safe_log("Warning: No valid CI data loaded from daily files", "WARNING")
|
||||||
|
return(data.frame())
|
||||||
|
}
|
||||||
|
|
||||||
|
# OPTIMIZATION: Use data.table for fast filtering (10-20x faster than dplyr on large datasets)
|
||||||
|
# Reshape to long format using ci_mean as the main CI value
|
||||||
|
DT <- data.table::as.data.table(combined_long)
|
||||||
|
DT <- DT[, .(field, sub_field, ci_mean, Date)]
|
||||||
|
DT[, c("value") := list(as.numeric(ci_mean))]
|
||||||
|
DT[, ci_mean := NULL]
|
||||||
|
|
||||||
|
# Fast filtering without .distinct() (which is slow on large datasets)
|
||||||
|
# Keep rows where Date is valid, field/sub_field exist, and value is finite
|
||||||
|
DT <- DT[!is.na(Date) & !is.na(sub_field) & !is.na(field) & is.finite(value)]
|
||||||
|
|
||||||
|
# Convert back to tibble for compatibility with rest of pipeline
|
||||||
|
pivot_stats_long <- dplyr::as_tibble(DT)
|
||||||
|
|
||||||
|
safe_log(sprintf("Loaded %d CI data points from %d daily files",
|
||||||
|
nrow(pivot_stats_long), length(all_daily_files)))
|
||||||
|
|
||||||
return(pivot_stats_long)
|
return(pivot_stats_long)
|
||||||
}
|
}
|
||||||
|
|
@ -65,15 +151,16 @@ load_combined_ci_data <- function(data_dir) {
|
||||||
#' @param harvesting_data Dataframe with harvesting information
|
#' @param harvesting_data Dataframe with harvesting information
|
||||||
#' @param field_CI_data Dataframe with CI measurements
|
#' @param field_CI_data Dataframe with CI measurements
|
||||||
#' @param season Year of the growing season
|
#' @param season Year of the growing season
|
||||||
|
#' @param verbose Logical: whether to log warnings/info (default TRUE). Set to FALSE during progress bar iteration.
|
||||||
#' @return Dataframe with interpolated daily CI values
|
#' @return Dataframe with interpolated daily CI values
|
||||||
#'
|
#'
|
||||||
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) {
|
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, verbose = TRUE) {
|
||||||
# Filter harvesting data for the given season and field name
|
# Filter harvesting data for the given season and field name
|
||||||
filtered_harvesting_data <- harvesting_data %>%
|
filtered_harvesting_data <- harvesting_data %>%
|
||||||
dplyr::filter(year == season, sub_field == field_name)
|
dplyr::filter(year == season, sub_field == field_name)
|
||||||
|
|
||||||
if (nrow(filtered_harvesting_data) == 0) {
|
if (nrow(filtered_harvesting_data) == 0) {
|
||||||
safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
|
if (verbose) safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -83,7 +170,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
||||||
|
|
||||||
# Return an empty data frame if no CI data is found
|
# Return an empty data frame if no CI data is found
|
||||||
if (nrow(filtered_field_CI_data) == 0) {
|
if (nrow(filtered_field_CI_data) == 0) {
|
||||||
safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
|
if (verbose) safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,12 +194,14 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
||||||
|
|
||||||
# If CI is empty after filtering, return an empty dataframe
|
# If CI is empty after filtering, return an empty dataframe
|
||||||
if (nrow(CI) == 0) {
|
if (nrow(CI) == 0) {
|
||||||
|
if (verbose) {
|
||||||
safe_log(paste0("No CI data within season dates for field: ", field_name,
|
safe_log(paste0("No CI data within season dates for field: ", field_name,
|
||||||
" (Season: ", season, ", dates: ",
|
" (Season: ", season, ", dates: ",
|
||||||
format(season_start, "%Y-%m-%d"), " to ",
|
format(season_start, "%Y-%m-%d"), " to ",
|
||||||
format(season_end, "%Y-%m-%d"),
|
format(season_end, "%Y-%m-%d"),
|
||||||
"). Available CI data range: ", ci_date_range),
|
"). Available CI data range: ", ci_date_range),
|
||||||
"WARNING")
|
"WARNING")
|
||||||
|
}
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -125,25 +214,23 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
||||||
subField = field_name
|
subField = field_name
|
||||||
)
|
)
|
||||||
|
|
||||||
# Log successful interpolation
|
# Return data with success status
|
||||||
safe_log(paste0("Successfully interpolated CI data for field: ", field_name,
|
|
||||||
" (Season: ", season, ", dates: ",
|
|
||||||
format(season_start, "%Y-%m-%d"), " to ",
|
|
||||||
format(season_end, "%Y-%m-%d"),
|
|
||||||
"). ", nrow(CI), " data points created."))
|
|
||||||
|
|
||||||
return(CI)
|
return(CI)
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
|
# Return empty dataframe on error (will be tracked separately)
|
||||||
|
if (verbose) {
|
||||||
safe_log(paste0("Error interpolating CI data for field ", field_name,
|
safe_log(paste0("Error interpolating CI data for field ", field_name,
|
||||||
" in season ", season,
|
" in season ", season,
|
||||||
" (", format(season_start, "%Y-%m-%d"), " to ",
|
" (", format(season_start, "%Y-%m-%d"), " to ",
|
||||||
format(season_end, "%Y-%m-%d"),
|
format(season_end, "%Y-%m-%d"),
|
||||||
"): ", e$message), "ERROR")
|
"): ", e$message), "ERROR")
|
||||||
|
}
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Generate interpolated CI data for all fields and seasons
|
#' Generate interpolated CI data for all fields and seasons
|
||||||
|
#' PARALLELIZE: Processes fields in parallel using furrr::future_map_df()
|
||||||
#'
|
#'
|
||||||
#' @param years Vector of years to process
|
#' @param years Vector of years to process
|
||||||
#' @param harvesting_data Dataframe with harvesting information
|
#' @param harvesting_data Dataframe with harvesting information
|
||||||
|
|
@ -153,17 +240,19 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
||||||
generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
||||||
safe_log("Starting CI data interpolation for all fields")
|
safe_log("Starting CI data interpolation for all fields")
|
||||||
|
|
||||||
|
# Track failed fields for end-of-run summary
|
||||||
|
failed_fields <- list()
|
||||||
|
total_fields <- 0
|
||||||
|
successful_fields <- 0
|
||||||
|
|
||||||
# Process each year
|
# Process each year
|
||||||
result <- purrr::map_df(years, function(yr) {
|
result <- purrr::map_df(years, function(yr) {
|
||||||
safe_log(paste("Processing year:", yr))
|
|
||||||
|
|
||||||
# Get the fields harvested in this year with valid season start dates
|
# Get the fields harvested in this year with valid season start dates
|
||||||
sub_fields <- harvesting_data %>%
|
sub_fields <- harvesting_data %>%
|
||||||
dplyr::filter(year == yr, !is.na(season_start)) %>%
|
dplyr::filter(year == yr, !is.na(season_start)) %>%
|
||||||
dplyr::pull(sub_field)
|
dplyr::pull(sub_field)
|
||||||
|
|
||||||
if (length(sub_fields) == 0) {
|
if (length(sub_fields) == 0) {
|
||||||
safe_log(paste("No fields with valid season data for year:", yr), "WARNING")
|
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -172,24 +261,74 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
||||||
purrr::keep(~ any(ci_data$sub_field == .x))
|
purrr::keep(~ any(ci_data$sub_field == .x))
|
||||||
|
|
||||||
if (length(valid_sub_fields) == 0) {
|
if (length(valid_sub_fields) == 0) {
|
||||||
safe_log(paste("No fields with CI data for year:", yr), "WARNING")
|
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract and interpolate data for each valid field
|
total_fields <<- total_fields + length(valid_sub_fields)
|
||||||
safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr))
|
safe_log(sprintf("Year %d: Processing %d fields in parallel", yr, length(valid_sub_fields)))
|
||||||
|
|
||||||
result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x,
|
# Set up parallel future plan for field interpolation
|
||||||
|
# Allocate 1 core per ~100 fields (with minimum 2 cores)
|
||||||
|
n_cores <- max(2, min(parallel::detectCores() - 1, ceiling(length(valid_sub_fields) / 100)))
|
||||||
|
future::plan(strategy = future::multisession, workers = n_cores)
|
||||||
|
|
||||||
|
# PARALLELIZE: Process all fields in parallel (each extracts & interpolates independently)
|
||||||
|
result_list <- furrr::future_map(
|
||||||
|
valid_sub_fields,
|
||||||
|
.progress = TRUE,
|
||||||
|
.options = furrr::furrr_options(seed = TRUE),
|
||||||
|
function(field) {
|
||||||
|
# Call with verbose=FALSE to suppress warnings during parallel iteration
|
||||||
|
extract_CI_data(field,
|
||||||
harvesting_data = harvesting_data,
|
harvesting_data = harvesting_data,
|
||||||
field_CI_data = ci_data,
|
field_CI_data = ci_data,
|
||||||
season = yr)) %>%
|
season = yr,
|
||||||
purrr::list_rbind()
|
verbose = FALSE)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr))
|
# Return to sequential processing
|
||||||
return(result)
|
future::plan(future::sequential)
|
||||||
|
|
||||||
|
# Process results and tracking
|
||||||
|
for (i in seq_along(result_list)) {
|
||||||
|
field_result <- result_list[[i]]
|
||||||
|
field_name <- valid_sub_fields[i]
|
||||||
|
|
||||||
|
if (nrow(field_result) > 0) {
|
||||||
|
successful_fields <<- successful_fields + 1
|
||||||
|
} else {
|
||||||
|
failed_fields[[length(failed_fields) + 1]] <<- list(
|
||||||
|
field = field_name,
|
||||||
|
season = yr,
|
||||||
|
reason = "Unable to generate interpolated data"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Combine all results for this year
|
||||||
|
result_list <- result_list[sapply(result_list, nrow) > 0] # Keep only non-empty
|
||||||
|
if (length(result_list) > 0) {
|
||||||
|
purrr::list_rbind(result_list)
|
||||||
|
} else {
|
||||||
|
data.frame()
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
safe_log(paste("Total interpolated data points:", nrow(result)))
|
# Print summary
|
||||||
|
safe_log(sprintf("\n=== Interpolation Summary ==="))
|
||||||
|
safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields))
|
||||||
|
|
||||||
|
if (length(failed_fields) > 0) {
|
||||||
|
safe_log(sprintf("Failed to interpolate: %d fields", length(failed_fields)))
|
||||||
|
for (failure in failed_fields) {
|
||||||
|
safe_log(sprintf(" - Field %s (Season %d): %s",
|
||||||
|
failure$field, failure$season, failure$reason), "WARNING")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(sprintf("Total interpolated data points: %d", nrow(result)))
|
||||||
|
|
||||||
return(result)
|
return(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -222,11 +361,19 @@ calculate_growth_metrics <- function(interpolated_data) {
|
||||||
#' @return Path to the saved file
|
#' @return Path to the saved file
|
||||||
#'
|
#'
|
||||||
save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") {
|
save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") {
|
||||||
|
# Validate input
|
||||||
|
if (is.null(output_dir) || !is.character(output_dir) || length(output_dir) == 0) {
|
||||||
|
stop("output_dir must be a non-empty character string")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Normalize path separators for Windows compatibility
|
||||||
|
output_dir <- normalizePath(output_dir, winslash = "/", mustWork = FALSE)
|
||||||
|
|
||||||
# Create output directory if it doesn't exist
|
# Create output directory if it doesn't exist
|
||||||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
# Create full file path
|
# Create full file path using file.path (more robust than here::here for absolute paths)
|
||||||
file_path <- here::here(output_dir, file_name)
|
file_path <- file.path(output_dir, file_name)
|
||||||
|
|
||||||
# Save the data
|
# Save the data
|
||||||
saveRDS(data, file_path)
|
saveRDS(data, file_path)
|
||||||
|
|
|
||||||
|
|
@ -1,71 +1,142 @@
|
||||||
# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\interpolate_growth_model.R
|
# ============================================================================
|
||||||
|
# SCRIPT 30: Growth Model Interpolation (CI Time Series)
|
||||||
|
# ============================================================================
|
||||||
|
# PURPOSE:
|
||||||
|
# Interpolate Canopy Index (CI) values across time to create continuous
|
||||||
|
# growth curves. Fills gaps in measurement dates, applies smoothing via
|
||||||
|
# LOESS, and generates daily CI estimates and cumulative statistics for
|
||||||
|
# each field. Enables downstream yield prediction and trend analysis.
|
||||||
#
|
#
|
||||||
# INTERPOLATE_GROWTH_MODEL.R
|
# INPUT DATA:
|
||||||
# =========================
|
# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
|
||||||
# This script interpolates CI (Chlorophyll Index) values between measurement dates
|
# - Format: RDS (wide format: fields × dates with CI values)
|
||||||
# to create a continuous growth model. It generates daily values and cumulative
|
# - Requirement: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
|
||||||
# CI statistics for each field.
|
|
||||||
#
|
#
|
||||||
# Usage: Rscript interpolate_growth_model.R [project_dir]
|
# OUTPUT DATA:
|
||||||
# - project_dir: Project directory name (e.g., "chemba")
|
# - Destination: laravel_app/storage/app/{project}/interpolated_ci/
|
||||||
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata
|
# - Format: RDS files per field (daily CI estimates)
|
||||||
|
# - Also exports: Growth model curves as RDS (cumulative CI, daily values)
|
||||||
|
#
|
||||||
|
# USAGE:
|
||||||
|
# Rscript 30_interpolate_growth_model.R [project]
|
||||||
|
#
|
||||||
|
# Example (Windows PowerShell):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# CLIENT TYPES:
|
||||||
|
# - cane_supply (ANGATA): Yes - core growth monitoring
|
||||||
|
# - agronomic_support (AURA): Yes - field health trend analysis
|
||||||
|
#
|
||||||
|
# DEPENDENCIES:
|
||||||
|
# - Packages: tidyverse, lubridate
|
||||||
|
# - Utils files: parameters_project.R, 00_common_utils.R, 30_growth_model_utils.R
|
||||||
|
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
|
||||||
|
# - Input data: combined_CI_data.rds from Script 20
|
||||||
|
# - Data directories: interpolated_ci/ (created if missing)
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# - Interpolation method: LOESS smoothing with span = 0.3 (sensitive to local trends)
|
||||||
|
# - Gap-filling: Assumes continuous growth between measurements; skips clouds
|
||||||
|
# - Cumulative CI: Sum of daily interpolated values from planting to current date
|
||||||
|
# - Used by: Script 80 (KPI trends) and Script 12 (yield forecasting)
|
||||||
|
# - Critical for 8-week CV trend calculation and 4-week growth categorization
|
||||||
|
#
|
||||||
|
# RELATED ISSUES:
|
||||||
|
# SC-112: Utilities restructuring
|
||||||
|
# SC-108: Core pipeline improvements
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
# 1. Load required packages
|
# 1. Load required packages
|
||||||
# -----------------------
|
# -----------------------
|
||||||
suppressPackageStartupMessages({
|
suppressPackageStartupMessages({
|
||||||
library(tidyverse)
|
# File path handling
|
||||||
library(lubridate)
|
library(here) # For relative path resolution (platform-independent file paths)
|
||||||
library(here)
|
|
||||||
|
# Data manipulation
|
||||||
|
library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
|
||||||
|
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
|
||||||
|
library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases)
|
||||||
|
|
||||||
|
# Parallel processing (Windows PSOCK + Mac/Linux fork-safe)
|
||||||
|
library(future) # For setting up parallel execution plans
|
||||||
|
library(furrr) # For future_map_dfr (parallel file I/O and field processing)
|
||||||
|
library(parallel) # For detectCores (automatic CPU detection)
|
||||||
|
library(data.table) # For fast filtering on large datasets
|
||||||
})
|
})
|
||||||
|
|
||||||
# 2. Main function to handle interpolation
|
# =============================================================================
|
||||||
# -------------------------------------
|
# MAIN PROCESSING FUNCTION
|
||||||
main <- function() {
|
# =============================================================================
|
||||||
# Process command line arguments
|
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
|
||||||
|
|
||||||
# Get project directory from arguments or use default
|
main <- function() {
|
||||||
if (length(args) >= 1 && !is.na(args[1])) {
|
# STEP 1: Set working directory to project root (smartcane/)
|
||||||
project_dir <- as.character(args[1])
|
# This ensures all relative paths resolve correctly
|
||||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
if (basename(getwd()) == "r_app") {
|
||||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
setwd("..")
|
||||||
} else {
|
|
||||||
project_dir <- "esa"
|
|
||||||
message("No project_dir provided. Using default:", project_dir)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Make project_dir available globally so parameters_project.R can use it
|
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
|
||||||
|
# Parse command-line arguments FIRST
|
||||||
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||||
|
|
||||||
|
# Make project_dir available globally for parameters_project.R
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
|
|
||||||
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
|
# Load parameters_project.R (provides setup_project_directories, etc.)
|
||||||
ci_extraction_script <- TRUE
|
|
||||||
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
# Initialize project configuration and load utility functions
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source("parameters_project.R")
|
source("r_app/parameters_project.R")
|
||||||
source("30_growth_model_utils.R")
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
|
||||||
tryCatch({
|
stop(e)
|
||||||
source(here::here("r_app", "parameters_project.R"))
|
|
||||||
source(here::here("r_app", "30_growth_model_utils.R"))
|
|
||||||
warning(paste("Successfully sourced files from 'r_app' directory."))
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
|
|
||||||
log_message("Starting CI growth model interpolation")
|
# Load growth model utilities
|
||||||
|
tryCatch({
|
||||||
|
source("r_app/30_growth_model_utils.R")
|
||||||
|
}, error = function(e) {
|
||||||
|
cat(sprintf("Error loading 30_growth_model_utils.R: %s\n", e$message))
|
||||||
|
stop(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
# STEP 3: Now all utilities are loaded, proceed with script logic
|
||||||
|
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
|
||||||
|
safe_log(sprintf("Project: %s", project_dir))
|
||||||
|
|
||||||
|
# Set up directory paths from parameters
|
||||||
|
setup <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
|
# For per-field architecture: read from daily_ci_vals_dir (Script 20 per-field output)
|
||||||
|
daily_vals_dir <- setup$daily_ci_vals_dir
|
||||||
|
safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir))
|
||||||
|
|
||||||
|
safe_log("Starting CI growth model interpolation")
|
||||||
|
|
||||||
|
# Set up data directory paths
|
||||||
|
data_dir <- setup$data_dir
|
||||||
|
|
||||||
# Load and process the data
|
# Load and process the data
|
||||||
tryCatch({
|
tryCatch({
|
||||||
# Load the combined CI data
|
# Load the combined CI data (created by Script 20 per-field)
|
||||||
CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
|
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
||||||
|
# OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files)
|
||||||
|
harvesting_data <- tryCatch({
|
||||||
|
load_harvesting_data(data_dir)
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING")
|
||||||
|
NULL
|
||||||
|
})
|
||||||
|
|
||||||
|
# Load CI data with date range pre-filtering
|
||||||
|
CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data)
|
||||||
|
|
||||||
# Validate harvesting data
|
# Validate harvesting data
|
||||||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||||||
|
safe_log("No harvesting data available", "ERROR")
|
||||||
stop("No harvesting data available")
|
stop("No harvesting data available")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -75,7 +146,7 @@ main <- function() {
|
||||||
distinct(year) %>%
|
distinct(year) %>%
|
||||||
pull(year)
|
pull(year)
|
||||||
|
|
||||||
log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
|
safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
|
||||||
|
|
||||||
# Generate interpolated CI data for each year and field
|
# Generate interpolated CI data for each year and field
|
||||||
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
|
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
|
||||||
|
|
@ -89,20 +160,20 @@ main <- function() {
|
||||||
# Add daily and cumulative metrics
|
# Add daily and cumulative metrics
|
||||||
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
|
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
|
||||||
|
|
||||||
# Save the processed data
|
# Save the processed data to cumulative_vals directory
|
||||||
save_growth_model(
|
save_growth_model(
|
||||||
CI_all_with_metrics,
|
CI_all_with_metrics,
|
||||||
cumulative_CI_vals_dir,
|
setup$cumulative_ci_vals_dir,
|
||||||
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
log_message("No CI data was generated after interpolation", level = "WARNING")
|
safe_log("No CI data was generated after interpolation", "WARNING")
|
||||||
}
|
}
|
||||||
|
|
||||||
log_message("Growth model interpolation completed successfully")
|
safe_log("Growth model interpolation completed successfully")
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
|
safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
|
||||||
stop(e$message)
|
stop(e$message)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,215 +0,0 @@
|
||||||
# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\mosaic_creation.R
|
|
||||||
#
|
|
||||||
# MOSAIC_CREATION.R
|
|
||||||
# ===============
|
|
||||||
# This script creates weekly mosaics from daily satellite imagery.
|
|
||||||
# It handles command-line arguments and initiates the mosaic creation process.
|
|
||||||
#
|
|
||||||
# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [use_tiles] [tile_size]
|
|
||||||
# - end_date: End date for processing (YYYY-MM-DD format)
|
|
||||||
# - offset: Number of days to look back from end_date
|
|
||||||
# - project_dir: Project directory name (e.g., "chemba")
|
|
||||||
# - file_name: Optional custom output file name
|
|
||||||
# - use_tiles: Use tile-based processing for memory efficiency (TRUE/FALSE, default: FALSE)
|
|
||||||
# - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE)
|
|
||||||
#
|
|
||||||
# Examples:
|
|
||||||
|
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 angata
|
|
||||||
#
|
|
||||||
|
|
||||||
# 1. Load required packages
|
|
||||||
# -----------------------
|
|
||||||
suppressPackageStartupMessages({
|
|
||||||
library(sf)
|
|
||||||
library(terra)
|
|
||||||
library(tidyverse)
|
|
||||||
library(lubridate)
|
|
||||||
library(here)
|
|
||||||
})
|
|
||||||
|
|
||||||
# 2. Process command line arguments and run mosaic creation
|
|
||||||
# ------------------------------------------------------
|
|
||||||
main <- function() {
|
|
||||||
# Capture command line arguments
|
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
|
||||||
|
|
||||||
# Process project_dir argument with default
|
|
||||||
if (length(args) >= 3 && !is.na(args[3])) {
|
|
||||||
project_dir <- as.character(args[3])
|
|
||||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
|
||||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
|
||||||
} else {
|
|
||||||
# Default project directory
|
|
||||||
project_dir <- "angata"
|
|
||||||
message("No project_dir provided. Using default:", project_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Make project_dir available globally so parameters_project.R can use it
|
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
# Process end_date argument with default
|
|
||||||
if (length(args) >= 1 && !is.na(args[1])) {
|
|
||||||
end_date <- as.Date(args[1])
|
|
||||||
if (is.na(end_date)) {
|
|
||||||
message("Invalid end_date provided. Using current date.")
|
|
||||||
end_date <- Sys.Date()
|
|
||||||
}
|
|
||||||
} else if (exists("end_date_str", envir = .GlobalEnv)) {
|
|
||||||
end_date <- as.Date(get("end_date_str", envir = .GlobalEnv))
|
|
||||||
} else {
|
|
||||||
# Default to current date if no argument is provided
|
|
||||||
end_date <- Sys.Date()
|
|
||||||
message("No end_date provided. Using current date: ", format(end_date))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process offset argument with default
|
|
||||||
if (length(args) >= 2 && !is.na(args[2])) {
|
|
||||||
offset <- as.numeric(args[2])
|
|
||||||
if (is.na(offset) || offset <= 0) {
|
|
||||||
message("Invalid offset provided. Using default (7 days).")
|
|
||||||
offset <- 7
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
# Default to 7 days if no argument is provided
|
|
||||||
offset <- 7
|
|
||||||
message("No offset provided. Using default:", offset, "days")
|
|
||||||
}
|
|
||||||
|
|
||||||
# 3. Initialize project configuration
|
|
||||||
# --------------------------------
|
|
||||||
|
|
||||||
# Detect which data source directory exists (merged_tif or merged_tif_8b)
|
|
||||||
laravel_storage <- here::here("laravel_app/storage/app", project_dir)
|
|
||||||
data_source <- if (dir.exists(file.path(laravel_storage, "merged_tif_8b"))) {
|
|
||||||
message("Detected data source: merged_tif_8b (8-band optimized)")
|
|
||||||
"merged_tif_8b"
|
|
||||||
} else if (dir.exists(file.path(laravel_storage, "merged_tif"))) {
|
|
||||||
message("Detected data source: merged_tif (legacy 4-band)")
|
|
||||||
"merged_tif"
|
|
||||||
} else {
|
|
||||||
message("Warning: No data source found. Using default: merged_tif_8b")
|
|
||||||
"merged_tif_8b"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Set global data_source for parameters_project.R
|
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
source("parameters_project.R")
|
|
||||||
source("40_mosaic_creation_utils.R")
|
|
||||||
safe_log(paste("Successfully sourced files from default directory."))
|
|
||||||
}, error = function(e) {
|
|
||||||
message("Note: Could not open files from default directory (expected on some systems)")
|
|
||||||
message("Attempting to source from 'r_app' directory instead...")
|
|
||||||
tryCatch({
|
|
||||||
source(here::here("r_app", "parameters_project.R"))
|
|
||||||
source(here::here("r_app", "40_mosaic_creation_utils.R"))
|
|
||||||
message("✓ Successfully sourced files from 'r_app' directory")
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
# 4. Generate date range for processing
|
|
||||||
# ---------------------------------
|
|
||||||
dates <- date_list(end_date, offset)
|
|
||||||
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
|
|
||||||
|
|
||||||
# Create output filename
|
|
||||||
file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) {
|
|
||||||
as.character(args[4])
|
|
||||||
} else {
|
|
||||||
paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")
|
|
||||||
}
|
|
||||||
|
|
||||||
safe_log(paste("Output will be saved as:", file_name_tif))
|
|
||||||
|
|
||||||
# 5. Create weekly mosaics - route based on project tile detection
|
|
||||||
# ---------------------------------------------------------------
|
|
||||||
# The use_tile_mosaic flag is auto-detected by parameters_project.R
|
|
||||||
# based on whether tiles exist in merged_final_tif/
|
|
||||||
|
|
||||||
if (!exists("use_tile_mosaic")) {
|
|
||||||
# Fallback detection if flag not set (shouldn't happen)
|
|
||||||
merged_final_dir <- file.path(laravel_storage, "merged_final_tif")
|
|
||||||
tile_detection <- detect_mosaic_mode(merged_final_dir)
|
|
||||||
use_tile_mosaic <- tile_detection$has_tiles
|
|
||||||
}
|
|
||||||
|
|
||||||
if (use_tile_mosaic) {
|
|
||||||
# TILE-BASED APPROACH: Create per-tile weekly MAX mosaics
|
|
||||||
# This is used for projects like Angata with large ROIs requiring spatial partitioning
|
|
||||||
# Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20)
|
|
||||||
tryCatch({
|
|
||||||
safe_log("Starting per-tile mosaic creation (tile-based approach)...")
|
|
||||||
|
|
||||||
# Detect grid size from merged_final_tif folder structure
|
|
||||||
# Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc.
|
|
||||||
merged_final_base <- file.path(laravel_storage, "merged_final_tif")
|
|
||||||
grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE)
|
|
||||||
# Look for grid size patterns like "5x5", "10x10", "20x20"
|
|
||||||
grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
|
|
||||||
|
|
||||||
if (length(grid_patterns) == 0) {
|
|
||||||
stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)")
|
|
||||||
}
|
|
||||||
|
|
||||||
grid_size <- grid_patterns[1] # Use first grid size found
|
|
||||||
safe_log(paste("Detected grid size:", grid_size))
|
|
||||||
|
|
||||||
# Point to the grid-specific merged_final_tif directory
|
|
||||||
merged_final_with_grid <- file.path(merged_final_base, grid_size)
|
|
||||||
|
|
||||||
# Set output directory for per-tile mosaics, organized by grid size
|
|
||||||
# Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif
|
|
||||||
tile_output_base <- file.path(laravel_storage, "weekly_tile_max", grid_size)
|
|
||||||
dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
|
|
||||||
created_tile_files <- create_weekly_mosaic_from_tiles(
|
|
||||||
dates = dates,
|
|
||||||
merged_final_dir = merged_final_with_grid,
|
|
||||||
tile_output_dir = tile_output_base,
|
|
||||||
field_boundaries = field_boundaries
|
|
||||||
)
|
|
||||||
|
|
||||||
safe_log(paste("✓ Per-tile mosaic creation completed - created",
|
|
||||||
length(created_tile_files), "tile files"))
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR")
|
|
||||||
traceback()
|
|
||||||
stop("Mosaic creation failed")
|
|
||||||
})
|
|
||||||
|
|
||||||
} else {
|
|
||||||
# SINGLE-FILE APPROACH: Create single weekly mosaic file
|
|
||||||
# This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output
|
|
||||||
tryCatch({
|
|
||||||
safe_log("Starting single-file mosaic creation (backward-compatible approach)...")
|
|
||||||
|
|
||||||
# Set output directory for single-file mosaics
|
|
||||||
single_file_output_dir <- file.path(laravel_storage, "weekly_mosaic")
|
|
||||||
|
|
||||||
created_file <- create_weekly_mosaic(
|
|
||||||
dates = dates,
|
|
||||||
field_boundaries = field_boundaries,
|
|
||||||
daily_vrt_dir = daily_vrt,
|
|
||||||
merged_final_dir = merged_final,
|
|
||||||
output_dir = single_file_output_dir,
|
|
||||||
file_name_tif = file_name_tif,
|
|
||||||
create_plots = FALSE
|
|
||||||
)
|
|
||||||
|
|
||||||
safe_log(paste("✓ Single-file mosaic creation completed:", created_file))
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR")
|
|
||||||
traceback()
|
|
||||||
stop("Mosaic creation failed")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sys.nframe() == 0) {
|
|
||||||
main()
|
|
||||||
}
|
|
||||||
|
|
||||||
209
r_app/40_mosaic_creation_per_field.R
Normal file
209
r_app/40_mosaic_creation_per_field.R
Normal file
|
|
@ -0,0 +1,209 @@
|
||||||
|
# ============================================================================
|
||||||
|
# SCRIPT 40: Weekly Mosaic Creation (Per-Field CI Aggregation)
|
||||||
|
# ============================================================================
|
||||||
|
# PURPOSE:
|
||||||
|
# Aggregate daily per-field CI TIFFs into weekly mosaics. Handles multi-date
|
||||||
|
# merging, cloud masking, and produces 5-band weekly output for reporting
|
||||||
|
# and KPI calculations. Supports both per-field and grid-based tile architecture.
|
||||||
|
#
|
||||||
|
# INPUT DATA:
|
||||||
|
# - Source: laravel_app/storage/app/{project}/field_tiles_CI/{FIELD}/{DATE}.tif
|
||||||
|
# - Format: GeoTIFF (5-band: R,G,B,NIR,CI as float32)
|
||||||
|
# - Dates: All available dates within week range
|
||||||
|
#
|
||||||
|
# OUTPUT DATA:
|
||||||
|
# - Destination: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/
|
||||||
|
# - Format: GeoTIFF (5-band: R,G,B,NIR,CI)
|
||||||
|
# - Naming: week_WW_YYYY.tif (WW = ISO week, YYYY = ISO year)
|
||||||
|
#
|
||||||
|
# USAGE:
|
||||||
|
# Rscript 40_mosaic_creation_per_field.R [end_date] [offset] [project]
|
||||||
|
#
|
||||||
|
# Example (Windows PowerShell):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-01-12 7 angata
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# - end_date: End date for processing (character, YYYY-MM-DD format, default today)
|
||||||
|
# - offset: Days to look back (numeric, default 7 for one week)
|
||||||
|
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# CLIENT TYPES:
|
||||||
|
# - cane_supply (ANGATA): Yes - weekly monitoring
|
||||||
|
# - agronomic_support (AURA): Yes - field health reporting
|
||||||
|
#
|
||||||
|
# DEPENDENCIES:
|
||||||
|
# - Packages: terra, sf, tidyverse, lubridate
|
||||||
|
# - Utils files: parameters_project.R, 40_mosaic_creation_per_field_utils.R
|
||||||
|
# - Input data: Daily per-field CI TIFFs from Script 20
|
||||||
|
# - Data directories: field_tiles_CI/, weekly_mosaic/
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# - Aggregation method: Maximum CI value per pixel across week (handles clouds)
|
||||||
|
# - ISO week-year used for consistent date handling across year boundaries
|
||||||
|
# - Supports both single-file and tiled mosaic architectures
|
||||||
|
# - Output feeds Scripts 80 (KPI calculations) and 90/91 (reporting)
|
||||||
|
# - Critical for trend analysis: week-over-week CI comparison
|
||||||
|
#
|
||||||
|
# RELATED ISSUES:
|
||||||
|
# SC-112: Script 40 cleanup (deleted legacy mosaic utils files)
|
||||||
|
# SC-108: Core pipeline improvements
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# 1. Load required packages
|
||||||
|
# -----------------------
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(sf)
|
||||||
|
library(terra)
|
||||||
|
library(tidyverse)
|
||||||
|
library(lubridate)
|
||||||
|
library(here)
|
||||||
|
})
|
||||||
|
|
||||||
|
# 2. Main execution function
|
||||||
|
# -------------------------
|
||||||
|
main <- function() {
|
||||||
|
|
||||||
|
cat("\n")
|
||||||
|
cat("========================================================\n")
|
||||||
|
cat(" Script 40: Per-Field Weekly Mosaic Creation\n")
|
||||||
|
cat("========================================================\n\n")
|
||||||
|
|
||||||
|
# Capture command line arguments
|
||||||
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
|
||||||
|
# ==== Process Arguments ====
|
||||||
|
|
||||||
|
# Project directory
|
||||||
|
if (length(args) >= 3 && !is.na(args[3])) {
|
||||||
|
project_dir <- as.character(args[3])
|
||||||
|
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
||||||
|
project_dir <- get("project_dir", envir = .GlobalEnv)
|
||||||
|
} else {
|
||||||
|
project_dir <- "angata"
|
||||||
|
message("[INFO] No project_dir provided. Using default: angata")
|
||||||
|
}
|
||||||
|
|
||||||
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
|
message(paste("[INFO] Project:", project_dir))
|
||||||
|
|
||||||
|
# End date
|
||||||
|
if (length(args) >= 1 && !is.na(args[1])) {
|
||||||
|
end_date <- as.Date(args[1], format = "%Y-%m-%d")
|
||||||
|
if (is.na(end_date)) {
|
||||||
|
message("[WARNING] Invalid end_date. Using current date.")
|
||||||
|
end_date <- Sys.Date()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
end_date <- Sys.Date()
|
||||||
|
message(paste("[INFO] No end_date provided. Using current date:", format(end_date)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Offset (days back)
|
||||||
|
if (length(args) >= 2 && !is.na(args[2])) {
|
||||||
|
offset <- as.numeric(args[2])
|
||||||
|
if (is.na(offset) || offset <= 0) {
|
||||||
|
message("[WARNING] Invalid offset. Using default: 7 days")
|
||||||
|
offset <- 7
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
offset <- 7
|
||||||
|
message("[INFO] No offset provided. Using default: 7 days")
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==== Load Configuration ====
|
||||||
|
|
||||||
|
# Set working directory if needed
|
||||||
|
if (basename(getwd()) == "r_app") {
|
||||||
|
setwd("..")
|
||||||
|
}
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
source("r_app/parameters_project.R")
|
||||||
|
message("[INFO] ✓ Loaded parameters_project.R")
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("[ERROR] Failed to load parameters_project.R: ", e$message)
|
||||||
|
})
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
source("r_app/40_mosaic_creation_per_field_utils.R")
|
||||||
|
message("[INFO] ✓ Loaded 40_mosaic_creation_per_field_utils.R")
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("[ERROR] Failed to load utilities: ", e$message)
|
||||||
|
})
|
||||||
|
|
||||||
|
# ==== Get Project Directories ====
|
||||||
|
|
||||||
|
setup <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
|
# Determine input/output directories
|
||||||
|
# Input: field_tiles_CI/ (from Script 20)
|
||||||
|
field_tiles_ci_dir <- setup$field_tiles_ci_dir
|
||||||
|
|
||||||
|
# Output: weekly_mosaic/ (for Scripts 90/91)
|
||||||
|
weekly_mosaic_output_dir <- file.path(setup$laravel_storage_dir, "weekly_mosaic")
|
||||||
|
|
||||||
|
message(paste("[INFO] Input directory:", field_tiles_ci_dir))
|
||||||
|
message(paste("[INFO] Output directory:", weekly_mosaic_output_dir))
|
||||||
|
|
||||||
|
# ==== Validate Input Directory ====
|
||||||
|
|
||||||
|
if (!dir.exists(field_tiles_ci_dir)) {
|
||||||
|
stop(paste("[ERROR] Input directory not found:", field_tiles_ci_dir,
|
||||||
|
"\nScript 20 (CI extraction) must be run first to create per-field TIFFs."))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if directory has any TIFFs
|
||||||
|
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE)
|
||||||
|
if (length(field_dirs) == 0) {
|
||||||
|
stop(paste("[ERROR] No field subdirectories found in:", field_tiles_ci_dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
message(paste("[INFO] Found", length(field_dirs), "field directories"))
|
||||||
|
|
||||||
|
# ==== Generate Date Range ====
|
||||||
|
|
||||||
|
dates <- date_list(end_date, offset)
|
||||||
|
|
||||||
|
# Validate week calculation
|
||||||
|
message(sprintf("[INFO] Requested offset: %d days", offset))
|
||||||
|
message(sprintf("[INFO] End date: %s", format(end_date, "%Y-%m-%d")))
|
||||||
|
message(sprintf("[INFO] Start date: %s", format(dates$start_date, "%Y-%m-%d")))
|
||||||
|
message(sprintf("[INFO] Calculating ISO week: %d", dates$week))
|
||||||
|
message(sprintf("[INFO] Calculating ISO year: %d", dates$year))
|
||||||
|
|
||||||
|
# ==== Create Per-Field Weekly Mosaics ====
|
||||||
|
|
||||||
|
created_files <- create_all_field_weekly_mosaics(
|
||||||
|
dates = dates,
|
||||||
|
field_tiles_ci_dir = field_tiles_ci_dir,
|
||||||
|
output_dir = weekly_mosaic_output_dir
|
||||||
|
)
|
||||||
|
|
||||||
|
# ==== Summary ====
|
||||||
|
|
||||||
|
message("\n")
|
||||||
|
message("========================================================")
|
||||||
|
message(paste(" COMPLETED"))
|
||||||
|
message(paste(" Created:", length(created_files), "weekly field mosaics"))
|
||||||
|
message("========================================================\n")
|
||||||
|
|
||||||
|
if (length(created_files) > 0) {
|
||||||
|
message("[SUCCESS] Weekly mosaics ready for reporting (Scripts 90/91)")
|
||||||
|
} else {
|
||||||
|
message("[WARNING] No mosaics created - check input data")
|
||||||
|
}
|
||||||
|
|
||||||
|
return(invisible(created_files))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Execute main if script is run directly
|
||||||
|
if (sys.nframe() == 0) {
|
||||||
|
tryCatch({
|
||||||
|
created <- main()
|
||||||
|
quit(save = "no", status = 0)
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste("\n[FATAL ERROR]", e$message))
|
||||||
|
quit(save = "no", status = 1)
|
||||||
|
})
|
||||||
|
}
|
||||||
268
r_app/40_mosaic_creation_per_field_utils.R
Normal file
268
r_app/40_mosaic_creation_per_field_utils.R
Normal file
|
|
@ -0,0 +1,268 @@
|
||||||
|
# MOSAIC_CREATION_PER_FIELD_UTILS.R
|
||||||
|
# ==================================
|
||||||
|
# Utility functions for creating per-field weekly mosaics from per-field daily TIFFs.
|
||||||
|
#
|
||||||
|
# This module aggregates daily per-field 5-band TIFFs (R,G,B,NIR,CI from Script 20)
|
||||||
|
# into weekly per-field mosaics using MAX compositing.
|
||||||
|
#
|
||||||
|
# DATA FLOW:
|
||||||
|
# Script 20: field_tiles_CI/{FIELD}/{DATE}.tif (5-band daily, per-field)
|
||||||
|
# ↓
|
||||||
|
# Script 40 NEW (this module):
|
||||||
|
# For each field:
|
||||||
|
# For each week:
|
||||||
|
# - Find all daily TIFFs for that week
|
||||||
|
# - Stack & create MAX composite
|
||||||
|
# - Save: weekly_mosaic/{FIELD}/week_WW_YYYY.tif
|
||||||
|
# ↓
|
||||||
|
# Scripts 90/91: Read weekly_mosaic/{FIELD}/week_WW_YYYY.tif (unchanged interface)
|
||||||
|
|
||||||
|
#' Generate date range for processing (ISO week-based)
|
||||||
|
#'
|
||||||
|
#' @param end_date The end date (Date object or YYYY-MM-DD string)
|
||||||
|
#' @param offset Number of days to look back from end_date (typically 7 for one week)
|
||||||
|
#' @return List with week, year, start_date, end_date, days_filter (vector of YYYY-MM-DD strings)
|
||||||
|
#'
|
||||||
|
date_list <- function(end_date, offset) {
|
||||||
|
if (!lubridate::is.Date(end_date)) {
|
||||||
|
end_date <- as.Date(end_date)
|
||||||
|
if (is.na(end_date)) {
|
||||||
|
stop("Invalid end_date. Expected Date object or YYYY-MM-DD string.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offset <- as.numeric(offset)
|
||||||
|
if (is.na(offset) || offset < 1) {
|
||||||
|
stop("Invalid offset. Expected positive number.")
|
||||||
|
}
|
||||||
|
|
||||||
|
offset <- offset - 1 # Adjust to include end_date
|
||||||
|
start_date <- end_date - lubridate::days(offset)
|
||||||
|
|
||||||
|
week <- lubridate::isoweek(end_date)
|
||||||
|
year <- lubridate::isoyear(end_date)
|
||||||
|
|
||||||
|
# Validate: Check that all dates in range belong to same ISO week
|
||||||
|
start_week <- lubridate::isoweek(start_date)
|
||||||
|
start_year <- lubridate::isoyear(start_date)
|
||||||
|
|
||||||
|
if (start_week != week || start_year != year) {
|
||||||
|
safe_log(sprintf("WARNING: Date range spans multiple ISO weeks! Start: week %d/%d, End: week %d/%d. Using END date week %d/%d.",
|
||||||
|
start_week, start_year, week, year, week, year), "WARNING")
|
||||||
|
}
|
||||||
|
|
||||||
|
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
||||||
|
days_filter <- format(days_filter, "%Y-%m-%d")
|
||||||
|
|
||||||
|
safe_log(paste("Date range:", start_date, "to", end_date,
|
||||||
|
"(week", week, "of", year, ")"))
|
||||||
|
|
||||||
|
return(list(
|
||||||
|
week = week,
|
||||||
|
year = year,
|
||||||
|
start_date = start_date,
|
||||||
|
end_date = end_date,
|
||||||
|
days_filter = days_filter
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Find all per-field daily TIFFs for a specific week
|
||||||
|
#'
|
||||||
|
#' @param field_tiles_ci_dir Base directory containing per-field TIFFs
|
||||||
|
#' (e.g., field_tiles_CI/)
|
||||||
|
#' @param days_filter Vector of YYYY-MM-DD dates to match
|
||||||
|
#' @return List with field names and their matching TIFF files for the week
|
||||||
|
#'
|
||||||
|
find_per_field_tiffs_for_week <- function(field_tiles_ci_dir, days_filter) {
|
||||||
|
|
||||||
|
if (!dir.exists(field_tiles_ci_dir)) {
|
||||||
|
safe_log(paste("Field TIFFs directory not found:", field_tiles_ci_dir), "WARNING")
|
||||||
|
return(list())
|
||||||
|
}
|
||||||
|
|
||||||
|
# List all field subdirectories
|
||||||
|
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE)
|
||||||
|
|
||||||
|
if (length(field_dirs) == 0) {
|
||||||
|
safe_log("No field subdirectories found in field_tiles_CI/", "WARNING")
|
||||||
|
return(list())
|
||||||
|
}
|
||||||
|
|
||||||
|
# For each field, find TIFF files matching the week's dates
|
||||||
|
field_tiffs <- list()
|
||||||
|
|
||||||
|
for (field in field_dirs) {
|
||||||
|
field_path <- file.path(field_tiles_ci_dir, field)
|
||||||
|
|
||||||
|
# Find all TIFF files in this field directory
|
||||||
|
tiff_files <- list.files(field_path, pattern = "\\.tif$", full.names = TRUE)
|
||||||
|
|
||||||
|
if (length(tiff_files) == 0) {
|
||||||
|
next # Skip fields with no TIFFs
|
||||||
|
}
|
||||||
|
|
||||||
|
# Filter to only those matching week's dates
|
||||||
|
matching_files <- tiff_files[grepl(paste(days_filter, collapse = "|"), tiff_files)]
|
||||||
|
|
||||||
|
if (length(matching_files) > 0) {
|
||||||
|
field_tiffs[[field]] <- sort(matching_files)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Found TIFFs for", length(field_tiffs), "fields in week"))
|
||||||
|
|
||||||
|
return(field_tiffs)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Create weekly MAX composite for a single field
|
||||||
|
#'
|
||||||
|
#' Loads all daily TIFFs for a field+week combination and creates a MAX composite
|
||||||
|
#' (per-band maximum across all days).
|
||||||
|
#'
|
||||||
|
#' @param tiff_files Vector of TIFF file paths for this field+week
|
||||||
|
#' @param field_name Name of the field (for logging)
|
||||||
|
#' @return SpatRaster with 5 bands (R,G,B,NIR,CI), or NULL if fails
|
||||||
|
#'
|
||||||
|
create_field_weekly_composite <- function(tiff_files, field_name) {
|
||||||
|
|
||||||
|
if (length(tiff_files) == 0) {
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Load all TIFFs
|
||||||
|
rasters <- list()
|
||||||
|
for (file in tiff_files) {
|
||||||
|
tryCatch({
|
||||||
|
r <- terra::rast(file)
|
||||||
|
rasters[[length(rasters) + 1]] <- r
|
||||||
|
}, error = function(e) {
|
||||||
|
# Silently skip load errors (they're already counted)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length(rasters) == 0) {
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create MAX composite
|
||||||
|
if (length(rasters) == 1) {
|
||||||
|
composite <- rasters[[1]]
|
||||||
|
} else {
|
||||||
|
# Stack all rasters and apply MAX per pixel per band
|
||||||
|
collection <- terra::sprc(rasters)
|
||||||
|
composite <- terra::mosaic(collection, fun = "max")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure 5 bands with expected names
|
||||||
|
if (terra::nlyr(composite) >= 5) {
|
||||||
|
composite <- terra::subset(composite, 1:5)
|
||||||
|
names(composite) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return(composite)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Error creating composite for field", field_name, ":", e$message), "ERROR")
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Save per-field weekly mosaic
|
||||||
|
#'
|
||||||
|
#' @param raster SpatRaster to save
|
||||||
|
#' @param output_dir Base output directory (e.g., laravel_app/storage/app/{project}/weekly_mosaic/)
|
||||||
|
#' @param field_name Name of the field
|
||||||
|
#' @param week Week number (ISO week)
|
||||||
|
#' @param year Year (ISO year)
|
||||||
|
#' @return File path of saved TIFF, or NULL if fails
|
||||||
|
#'
|
||||||
|
save_field_weekly_mosaic <- function(raster, output_dir, field_name, week, year) {
|
||||||
|
|
||||||
|
if (is.null(raster)) {
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Create field-specific output directory
|
||||||
|
field_output_dir <- file.path(output_dir, field_name)
|
||||||
|
dir.create(field_output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
# Generate filename: week_WW_YYYY.tif
|
||||||
|
filename <- sprintf("week_%02d_%04d.tif", week, year)
|
||||||
|
file_path <- file.path(field_output_dir, filename)
|
||||||
|
|
||||||
|
# Save raster (silently)
|
||||||
|
terra::writeRaster(raster, file_path, overwrite = TRUE)
|
||||||
|
|
||||||
|
return(file_path)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Error saving mosaic for field", field_name, ":", e$message), "ERROR")
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Create all weekly mosaics for all fields in a week
|
||||||
|
#'
|
||||||
|
#' Main orchestration function. Loops over all fields and creates weekly mosaics.
|
||||||
|
#'
|
||||||
|
#' @param dates List from date_list() - contains week, year, days_filter
|
||||||
|
#' @param field_tiles_ci_dir Input: field_tiles_CI/ directory
|
||||||
|
#' @param output_dir Output: weekly_mosaic/ directory
|
||||||
|
#' @return Vector of successfully created file paths
|
||||||
|
#'
|
||||||
|
create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_dir) {
|
||||||
|
|
||||||
|
safe_log(paste("=== Creating Per-Field Weekly Mosaics ==="))
|
||||||
|
safe_log(paste("Week:", dates$week, "Year:", dates$year))
|
||||||
|
|
||||||
|
# Find all per-field TIFFs for this week
|
||||||
|
field_tiffs <- find_per_field_tiffs_for_week(field_tiles_ci_dir, dates$days_filter)
|
||||||
|
|
||||||
|
if (length(field_tiffs) == 0) {
|
||||||
|
safe_log("No per-field TIFFs found for this week - returning empty list", "WARNING")
|
||||||
|
return(character())
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Processing", length(field_tiffs), "fields..."))
|
||||||
|
|
||||||
|
created_files <- character()
|
||||||
|
|
||||||
|
# Initialize progress bar
|
||||||
|
pb <- txtProgressBar(min = 0, max = length(field_tiffs), style = 3, width = 50)
|
||||||
|
counter <- 0
|
||||||
|
|
||||||
|
# Process each field
|
||||||
|
for (field_name in names(field_tiffs)) {
|
||||||
|
counter <- counter + 1
|
||||||
|
tiff_files <- field_tiffs[[field_name]]
|
||||||
|
|
||||||
|
# Create composite
|
||||||
|
composite <- create_field_weekly_composite(tiff_files, field_name)
|
||||||
|
|
||||||
|
if (!is.null(composite)) {
|
||||||
|
# Save
|
||||||
|
saved_path <- save_field_weekly_mosaic(
|
||||||
|
composite,
|
||||||
|
output_dir,
|
||||||
|
field_name,
|
||||||
|
dates$week,
|
||||||
|
dates$year
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!is.null(saved_path)) {
|
||||||
|
created_files <- c(created_files, saved_path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setTxtProgressBar(pb, counter)
|
||||||
|
}
|
||||||
|
|
||||||
|
close(pb)
|
||||||
|
cat("\n") # New line after progress bar
|
||||||
|
safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics"))
|
||||||
|
|
||||||
|
return(created_files)
|
||||||
|
}
|
||||||
|
|
@ -1,286 +0,0 @@
|
||||||
# MOSAIC_CREATION_TILE_UTILS.R
|
|
||||||
# ============================
|
|
||||||
# Tile-based processing utilities for scalable weekly mosaic creation
|
|
||||||
#
|
|
||||||
# STRATEGY: Memory-efficient, scalable approach for large study areas
|
|
||||||
# Split daily full-extent mosaics into fixed-size tiles (e.g., 5×5 km),
|
|
||||||
# then process each tile position across all days, and reassemble.
|
|
||||||
#
|
|
||||||
# WORKFLOW:
|
|
||||||
# Daily full-extent TIFF
|
|
||||||
# ↓
|
|
||||||
# Split into N×M tiles (based on area size / tile_size)
|
|
||||||
# E.g., 50×80 km area with 5 km tiles = 10×16 = 160 tiles
|
|
||||||
# ↓
|
|
||||||
# For EACH TILE POSITION (1 to 160):
|
|
||||||
# - Load that tile from all 7 days
|
|
||||||
# - Create MAX composite for that tile
|
|
||||||
# - Save tile_NNN_MAX.tif
|
|
||||||
# ↓
|
|
||||||
# Reassemble all 160 MAX tiles back into full-extent
|
|
||||||
# ↓
|
|
||||||
# Save weekly full-extent mosaic
|
|
||||||
#
|
|
||||||
# KEY BENEFIT: Memory usage ~50 MB per tile (5×5 km), not 1.3 GB (full 50×80 km)
|
|
||||||
# Scales automatically: bigger area = more tiles, same memory per tile
|
|
||||||
#
|
|
||||||
# TILE_SIZE: Configurable (default 5000 m = 5 km, adjustable via parameter)
|
|
||||||
|
|
||||||
#' Simple tile-based processing using terra::makeTiles()
|
|
||||||
#'
|
|
||||||
#' Calculates tile grid based on desired tile SIZE (e.g., 5 km), not grid count.
|
|
||||||
#' This makes it SCALABLE: bigger area = more tiles automatically.
|
|
||||||
#'
|
|
||||||
#' @param raster_path Path to raster to split
|
|
||||||
#' @param output_dir Directory to save tiles
|
|
||||||
#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km)
|
|
||||||
#' @return List with tiles (file paths) and metadata
|
|
||||||
#'
|
|
||||||
split_raster_into_tiles <- function(raster_path, output_dir, tile_size_m = 5000) {
|
|
||||||
# Load raster
|
|
||||||
r <- terra::rast(raster_path)
|
|
||||||
|
|
||||||
# Calculate how many tiles we need based on raster extent and tile size
|
|
||||||
x_range <- terra::ext(r)$xmax - terra::ext(r)$xmin
|
|
||||||
y_range <- terra::ext(r)$ymax - terra::ext(r)$ymin
|
|
||||||
|
|
||||||
n_tiles_x <- ceiling(x_range / tile_size_m)
|
|
||||||
n_tiles_y <- ceiling(y_range / tile_size_m)
|
|
||||||
|
|
||||||
safe_log(paste("Splitting into", n_tiles_x, "×", n_tiles_y, "tiles of",
|
|
||||||
tile_size_m / 1000, "km"))
|
|
||||||
|
|
||||||
# Create output directory
|
|
||||||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
|
|
||||||
# Use terra::makeTiles() - it splits based on n = c(rows, cols)
|
|
||||||
tiles <- terra::makeTiles(
|
|
||||||
r,
|
|
||||||
n = c(n_tiles_y, n_tiles_x), # rows, cols
|
|
||||||
filename = file.path(output_dir, "tile_.tif"),
|
|
||||||
overwrite = TRUE
|
|
||||||
)
|
|
||||||
|
|
||||||
safe_log(paste("Created", length(tiles), "tile files"))
|
|
||||||
|
|
||||||
return(list(
|
|
||||||
tiles = tiles,
|
|
||||||
n_tiles = length(tiles),
|
|
||||||
n_tiles_x = n_tiles_x,
|
|
||||||
n_tiles_y = n_tiles_y,
|
|
||||||
extent = terra::ext(r),
|
|
||||||
raster = raster_path
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#' Create weekly MAX mosaic using TRUE tile-based processing
|
|
||||||
#'
|
|
||||||
#' TILE-BASED WORKFLOW (Memory efficient):
|
|
||||||
#' 1. Calculate tile grid dynamically from extent and FIXED tile_size (e.g., 5 km)
|
|
||||||
#' 2. For EACH TILE across all 7 days:
|
|
||||||
#' - Load that tile from each daily file (small ~50 MB, not 1.3 GB)
|
|
||||||
#' - Create MAX composite for just that tile
|
|
||||||
#' 3. Reassemble all tiles into final full-extent mosaic
|
|
||||||
#'
|
|
||||||
#' SCALABILITY: Fixed 5 km tile size means bigger area = more tiles (automatic scaling)
|
|
||||||
#'
|
|
||||||
#' @param dates List from date_list() with days_filter
|
|
||||||
#' @param merged_final_dir Directory with daily merged full-extent TIFFs
|
|
||||||
#' @param final_output_dir Directory to store final reassembled mosaic
|
|
||||||
#' @param file_name_tif Output filename for final mosaic
|
|
||||||
#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km). Bigger area automatically gets more tiles.
|
|
||||||
#' @return Path to final reassembled weekly mosaic
|
|
||||||
#'
|
|
||||||
create_weekly_mosaic_tiled <- function(dates, merged_final_dir,
|
|
||||||
final_output_dir, file_name_tif,
|
|
||||||
tile_size_m = 5000) {
|
|
||||||
|
|
||||||
# Get daily files for this week
|
|
||||||
daily_files <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$")
|
|
||||||
daily_files <- daily_files[grepl(paste(dates$days_filter, collapse = "|"), daily_files)]
|
|
||||||
|
|
||||||
if (length(daily_files) == 0) {
|
|
||||||
safe_log("No daily files found for this week", "ERROR")
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
safe_log(paste("Found", length(daily_files), "daily files for week", dates$week))
|
|
||||||
|
|
||||||
# Load first daily file to get extent and calculate tile grid
|
|
||||||
safe_log("Step 1: Loading first daily file to establish tile structure")
|
|
||||||
first_raster <- terra::rast(daily_files[1])
|
|
||||||
|
|
||||||
# Get CRS and convert tile_size_m to appropriate units
|
|
||||||
raster_crs <- terra::crs(first_raster)
|
|
||||||
|
|
||||||
# If raster is in lat/lon (geographic), convert tile_size_m to degrees
|
|
||||||
# 1 degree latitude ≈ 111 km, so 5000 m ≈ 0.045 degrees
|
|
||||||
# Check for GEOGCRS (geographic coordinate system) in WKT string
|
|
||||||
is_geographic <- grepl("GEOGCRS|longlat|geographic|ANGLEUNIT.*degree",
|
|
||||||
as.character(raster_crs), ignore.case = TRUE)
|
|
||||||
|
|
||||||
if (is_geographic) {
|
|
||||||
# Geographic CRS - convert meters to degrees
|
|
||||||
# At equator: 1 degree ≈ 111 km
|
|
||||||
tile_size_degrees <- tile_size_m / 111000 # 111 km per degree
|
|
||||||
safe_log(paste("Raster is in geographic CRS (lat/lon). Converting", tile_size_m / 1000,
|
|
||||||
"km to", round(tile_size_degrees, 4), "degrees"))
|
|
||||||
} else {
|
|
||||||
# Projected CRS - use meters directly
|
|
||||||
tile_size_degrees <- tile_size_m
|
|
||||||
safe_log(paste("Raster is in projected CRS. Using", tile_size_m / 1000, "km directly"))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Calculate n_tiles dynamically from extent and tile_size
|
|
||||||
x_range <- terra::ext(first_raster)$xmax - terra::ext(first_raster)$xmin
|
|
||||||
y_range <- terra::ext(first_raster)$ymax - terra::ext(first_raster)$ymin
|
|
||||||
|
|
||||||
n_tiles_x <- ceiling(x_range / tile_size_degrees)
|
|
||||||
n_tiles_y <- ceiling(y_range / tile_size_degrees)
|
|
||||||
n_tiles_total <- n_tiles_x * n_tiles_y
|
|
||||||
|
|
||||||
safe_log(paste("Step 2: Creating tile grid:", tile_size_m / 1000, "km tiles"))
|
|
||||||
safe_log(paste(" Extent:", round(x_range, 4), "° ×", round(y_range, 4), "°"))
|
|
||||||
safe_log(paste(" Grid:", n_tiles_y, "rows ×", n_tiles_x, "columns =", n_tiles_total, "tiles"))
|
|
||||||
|
|
||||||
# Calculate tile extents mathematically (no need to save temp files)
|
|
||||||
extent <- terra::ext(first_raster)
|
|
||||||
x_min <- extent$xmin
|
|
||||||
y_min <- extent$ymin
|
|
||||||
|
|
||||||
# Create list of tile extents
|
|
||||||
tile_extents <- list()
|
|
||||||
tile_idx <- 0
|
|
||||||
|
|
||||||
for (row in 1:n_tiles_y) {
|
|
||||||
for (col in 1:n_tiles_x) {
|
|
||||||
tile_idx <- tile_idx + 1
|
|
||||||
|
|
||||||
# Calculate this tile's bounds
|
|
||||||
tile_xmin <- x_min + (col - 1) * tile_size_degrees
|
|
||||||
tile_xmax <- min(tile_xmin + tile_size_degrees, extent$xmax)
|
|
||||||
tile_ymin <- y_min + (row - 1) * tile_size_degrees
|
|
||||||
tile_ymax <- min(tile_ymin + tile_size_degrees, extent$ymax)
|
|
||||||
|
|
||||||
tile_extents[[tile_idx]] <- terra::ext(tile_xmin, tile_xmax, tile_ymin, tile_ymax)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
safe_log(paste("Calculated", length(tile_extents), "tile extents"))
|
|
||||||
|
|
||||||
# Save tiles to Laravel storage directory
|
|
||||||
tile_storage_dir <- file.path("laravel_app", "storage", "app", "angata", "daily_tiles")
|
|
||||||
dir.create(tile_storage_dir, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
|
|
||||||
# For each tile, load from all daily files and create MAX
|
|
||||||
safe_log("Step 3: Processing tiles (load per-tile across all days, create MAX for each)")
|
|
||||||
tile_files_list <- list()
|
|
||||||
|
|
||||||
for (tile_idx in seq_along(tile_extents)) {
|
|
||||||
if (tile_idx %% max(1, ceiling(n_tiles_total / 10)) == 0 || tile_idx == 1) {
|
|
||||||
safe_log(paste(" Processing tile", tile_idx, "of", n_tiles_total))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get this tile's extent
|
|
||||||
tile_extent <- tile_extents[[tile_idx]]
|
|
||||||
|
|
||||||
# Load and crop all daily files to this tile extent
|
|
||||||
daily_tile_data <- list()
|
|
||||||
for (file_idx in seq_along(daily_files)) {
|
|
||||||
tryCatch({
|
|
||||||
r <- terra::rast(daily_files[file_idx])
|
|
||||||
cropped <- terra::crop(r, tile_extent, snap = "in")
|
|
||||||
daily_tile_data[[file_idx]] <- cropped
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Warning: Could not crop tile", tile_idx, "from",
|
|
||||||
basename(daily_files[file_idx])), "WARNING")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(daily_tile_data) == 0) {
|
|
||||||
safe_log(paste("No valid data for tile", tile_idx), "WARNING")
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create MAX composite for this tile
|
|
||||||
if (length(daily_tile_data) == 1) {
|
|
||||||
tile_max <- daily_tile_data[[1]]
|
|
||||||
} else {
|
|
||||||
collection <- terra::sprc(daily_tile_data)
|
|
||||||
tile_max <- terra::mosaic(collection, fun = "max")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Save tile to disk (keeps memory low)
|
|
||||||
tile_file <- file.path(tile_storage_dir, sprintf("tile_%04d.tif", tile_idx))
|
|
||||||
terra::writeRaster(tile_max, tile_file, overwrite = TRUE)
|
|
||||||
tile_files_list[[tile_idx]] <- tile_file
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(tile_files_list) == 0) {
|
|
||||||
safe_log("No tiles processed successfully", "ERROR")
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
safe_log(paste("Step 4: Reassembling", length(tile_files_list), "tiles from disk into full-extent mosaic"))
|
|
||||||
|
|
||||||
# Load all tile files and reassemble
|
|
||||||
tile_rasters <- lapply(tile_files_list, terra::rast)
|
|
||||||
collection <- terra::sprc(tile_rasters)
|
|
||||||
final_mosaic <- terra::mosaic(collection, fun = "first")
|
|
||||||
|
|
||||||
safe_log("Step 5: Saving final reassembled mosaic")
|
|
||||||
|
|
||||||
# Save
|
|
||||||
dir.create(final_output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
final_file_path <- file.path(final_output_dir, file_name_tif)
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
terra::writeRaster(final_mosaic, final_file_path, overwrite = TRUE)
|
|
||||||
safe_log(paste("✓ Weekly mosaic saved to:", final_file_path))
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error saving mosaic:", e$message), "ERROR")
|
|
||||||
return(NULL)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Cleanup temporary tile files
|
|
||||||
unlink(tile_storage_dir, recursive = TRUE)
|
|
||||||
safe_log("Cleaned up temporary tile files")
|
|
||||||
|
|
||||||
return(final_file_path)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Load tile MAX rasters for a specific week (for per-tile analysis)
|
|
||||||
#'
|
|
||||||
#' @param week Week number
|
|
||||||
#' @param tile_dir Directory containing tile mosaics (week subdirectories)
|
|
||||||
#' @return List of tile rasters with tile_id metadata
|
|
||||||
#'
|
|
||||||
load_tile_mosaics <- function(week, tile_dir) {
|
|
||||||
week_dir <- file.path(tile_dir, paste0("week_", week))
|
|
||||||
|
|
||||||
if (!dir.exists(week_dir)) {
|
|
||||||
warning(paste("Tile directory not found:", week_dir))
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Load all tile files
|
|
||||||
tile_files <- list.files(week_dir, pattern = "^tile_.*\\.tif$", full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(tile_files) == 0) {
|
|
||||||
warning("No tile files found in:", week_dir)
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Sort by tile ID
|
|
||||||
tile_numbers <- as.numeric(gsub(".*tile_(\\d+).*", "\\1", tile_files))
|
|
||||||
tile_files <- tile_files[order(tile_numbers)]
|
|
||||||
|
|
||||||
# Load rasters
|
|
||||||
tiles_list <- lapply(tile_files, terra::rast)
|
|
||||||
names(tiles_list) <- sprintf("tile_%03d", sort(tile_numbers))
|
|
||||||
|
|
||||||
safe_log(paste("Loaded", length(tiles_list), "tile mosaics for week", week))
|
|
||||||
|
|
||||||
return(tiles_list)
|
|
||||||
}
|
|
||||||
|
|
@ -1,827 +0,0 @@
|
||||||
# MOSAIC_CREATION_UTILS.R
|
|
||||||
# ======================
|
|
||||||
# Utility functions for creating weekly mosaics from daily satellite imagery.
|
|
||||||
# These functions support cloud cover assessment, date handling, and mosaic creation.
|
|
||||||
|
|
||||||
#' Detect whether a project uses tile-based or single-file mosaic approach
|
|
||||||
#'
|
|
||||||
#' @param merged_final_tif_dir Directory containing merged_final_tif files
|
|
||||||
#' @return List with has_tiles (logical), detected_tiles (vector), total_files (count)
|
|
||||||
#'
|
|
||||||
detect_mosaic_mode <- function(merged_final_tif_dir) {
|
|
||||||
# Check if directory exists
|
|
||||||
if (!dir.exists(merged_final_tif_dir)) {
|
|
||||||
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
|
|
||||||
}
|
|
||||||
|
|
||||||
# List all .tif files in merged_final_tif
|
|
||||||
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
|
|
||||||
|
|
||||||
if (length(tif_files) == 0) {
|
|
||||||
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
|
|
||||||
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
|
|
||||||
tile_pattern <- "_(\\d{2})\\.tif$"
|
|
||||||
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
|
|
||||||
|
|
||||||
has_tiles <- length(tile_files) > 0
|
|
||||||
|
|
||||||
return(list(
|
|
||||||
has_tiles = has_tiles,
|
|
||||||
detected_tiles = tile_files,
|
|
||||||
total_files = length(tif_files)
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Safe logging function
|
|
||||||
#' @param message The message to log
|
|
||||||
#' @param level The log level (default: "INFO")
|
|
||||||
#' @return NULL (used for side effects)
|
|
||||||
#'
|
|
||||||
safe_log <- function(message, level = "INFO") {
|
|
||||||
if (exists("log_message")) {
|
|
||||||
log_message(message, level)
|
|
||||||
} else {
|
|
||||||
if (level %in% c("ERROR", "WARNING")) {
|
|
||||||
warning(message)
|
|
||||||
} else {
|
|
||||||
message(message)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Generate a sequence of dates for processing
|
|
||||||
#'
|
|
||||||
#' @param end_date The end date for the sequence (Date object)
|
|
||||||
#' @param offset Number of days to look back from end_date
|
|
||||||
#' @return A list containing week number, year, and a sequence of dates for filtering
|
|
||||||
#'
|
|
||||||
date_list <- function(end_date, offset) {
|
|
||||||
# Input validation
|
|
||||||
if (!lubridate::is.Date(end_date)) {
|
|
||||||
end_date <- as.Date(end_date)
|
|
||||||
if (is.na(end_date)) {
|
|
||||||
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
offset <- as.numeric(offset)
|
|
||||||
if (is.na(offset) || offset < 1) {
|
|
||||||
stop("Invalid offset provided. Expected a positive number.")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Calculate date range
|
|
||||||
offset <- offset - 1 # Adjust offset to include end_date
|
|
||||||
start_date <- end_date - lubridate::days(offset)
|
|
||||||
|
|
||||||
# Extract week and year information
|
|
||||||
week <- lubridate::isoweek(end_date)
|
|
||||||
year <- lubridate::isoyear(end_date)
|
|
||||||
|
|
||||||
# Generate sequence of dates
|
|
||||||
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
|
||||||
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
|
||||||
|
|
||||||
# Log the date range
|
|
||||||
safe_log(paste("Date range generated from", start_date, "to", end_date))
|
|
||||||
|
|
||||||
return(list(
|
|
||||||
"week" = week,
|
|
||||||
"year" = year,
|
|
||||||
"days_filter" = days_filter,
|
|
||||||
"start_date" = start_date,
|
|
||||||
"end_date" = end_date
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Create a weekly mosaic from available VRT files
|
|
||||||
#'
|
|
||||||
#' @param dates List from date_list() with date range info
|
|
||||||
#' @param field_boundaries Field boundaries for image cropping
|
|
||||||
#' @param daily_vrt_dir Directory containing VRT files
|
|
||||||
#' @param merged_final_dir Directory with merged final rasters
|
|
||||||
#' @param output_dir Output directory for weekly mosaics
|
|
||||||
#' @param file_name_tif Output filename for the mosaic
|
|
||||||
#' @param create_plots Whether to create visualization plots (default: TRUE)
|
|
||||||
#' @return The file path of the saved mosaic
|
|
||||||
#'
|
|
||||||
create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir,
|
|
||||||
merged_final_dir, output_dir, file_name_tif,
|
|
||||||
create_plots = FALSE) {
|
|
||||||
# Find VRT files for the specified date range
|
|
||||||
vrt_list <- find_vrt_files(daily_vrt_dir, dates)
|
|
||||||
|
|
||||||
# Find final raster files for fallback
|
|
||||||
raster_files_final <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$")
|
|
||||||
|
|
||||||
# Process the mosaic if VRT files are available
|
|
||||||
if (length(vrt_list) > 0) {
|
|
||||||
safe_log("VRT list created, assessing cloud cover for mosaic creation")
|
|
||||||
|
|
||||||
# Calculate aggregated cloud cover statistics (returns data frame for image selection)
|
|
||||||
cloud_coverage_stats <- count_cloud_coverage(vrt_list, merged_final_dir, field_boundaries)
|
|
||||||
|
|
||||||
# Create mosaic based on cloud cover assessment
|
|
||||||
mosaic <- create_mosaic(raster_files_final, cloud_coverage_stats, field_boundaries)
|
|
||||||
|
|
||||||
} else {
|
|
||||||
safe_log("No VRT files available for the date range, creating empty mosaic with NA values", "WARNING")
|
|
||||||
|
|
||||||
# Create empty mosaic if no files are available
|
|
||||||
if (length(raster_files_final) == 0) {
|
|
||||||
stop("No VRT files or final raster files available to create mosaic")
|
|
||||||
}
|
|
||||||
|
|
||||||
mosaic <- terra::rast(raster_files_final[1])
|
|
||||||
mosaic <- terra::setValues(mosaic, NA)
|
|
||||||
mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE)
|
|
||||||
|
|
||||||
names(mosaic) <- c("Red", "Green", "Blue", "NIR", "CI")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Save the mosaic (without mask files to avoid breaking other scripts)
|
|
||||||
file_path <- save_mosaic(mosaic, output_dir, file_name_tif, create_plots, save_mask = FALSE)
|
|
||||||
|
|
||||||
safe_log(paste("Weekly mosaic processing completed for week", dates$week))
|
|
||||||
|
|
||||||
return(file_path)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Find VRT files within a date range
|
|
||||||
#'
|
|
||||||
#' @param vrt_directory Directory containing VRT files
|
|
||||||
#' @param dates List from date_list() function containing days_filter
|
|
||||||
#' @return Character vector of VRT file paths
|
|
||||||
#'
|
|
||||||
find_vrt_files <- function(vrt_directory, dates) {
|
|
||||||
# Get all VRT files in directory
|
|
||||||
vrt_files <- list.files(here::here(vrt_directory), full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(vrt_files) == 0) {
|
|
||||||
warning("No VRT files found in directory: ", vrt_directory)
|
|
||||||
return(character(0))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Filter files by dates
|
|
||||||
vrt_list <- purrr::map(dates$days_filter, ~ vrt_files[grepl(pattern = .x, x = vrt_files)]) %>%
|
|
||||||
purrr::compact() %>%
|
|
||||||
purrr::flatten_chr()
|
|
||||||
|
|
||||||
# Log results
|
|
||||||
safe_log(paste("Found", length(vrt_list), "VRT files for the date range"))
|
|
||||||
|
|
||||||
return(vrt_list)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Count missing pixels (clouds) in rasters - per field analysis using actual TIF files
|
|
||||||
#'
|
|
||||||
#' @param vrt_list List of VRT file paths (used to extract dates for TIF file lookup)
|
|
||||||
#' @param merged_final_dir Directory containing the actual TIF files (e.g., merged_final_tif)
|
|
||||||
#' @param field_boundaries Field boundaries (sf object) for calculating cloud cover over field areas only
|
|
||||||
#' @return Data frame with aggregated cloud statistics for each TIF file (used for mosaic selection)
|
|
||||||
#'
|
|
||||||
count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_boundaries = NULL) {
|
|
||||||
if (length(vrt_list) == 0) {
|
|
||||||
warning("No VRT files provided for cloud coverage calculation")
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
# Extract dates from VRT filenames to find corresponding TIF files
|
|
||||||
# VRT filenames are like "merged2025-12-18.vrt", TIF filenames are like "2025-12-18.tif"
|
|
||||||
tif_dates <- gsub(".*([0-9]{4}-[0-9]{2}-[0-9]{2}).*", "\\1", basename(vrt_list))
|
|
||||||
|
|
||||||
# Build list of actual TIF files to use
|
|
||||||
tif_files <- paste0(here::here(merged_final_dir), "/", tif_dates, ".tif")
|
|
||||||
|
|
||||||
# Check which TIF files exist
|
|
||||||
tif_exist <- file.exists(tif_files)
|
|
||||||
if (!any(tif_exist)) {
|
|
||||||
warning("No TIF files found in directory: ", merged_final_dir)
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
tif_files <- tif_files[tif_exist]
|
|
||||||
safe_log(paste("Found", length(tif_files), "TIF files for cloud coverage assessment"))
|
|
||||||
|
|
||||||
# Initialize list to store aggregated results
|
|
||||||
aggregated_results <- list()
|
|
||||||
|
|
||||||
# Process each TIF file
|
|
||||||
for (tif_idx in seq_along(tif_files)) {
|
|
||||||
tif_file <- tif_files[tif_idx]
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
# Load the TIF file (typically has 5 bands: R, G, B, NIR, CI)
|
|
||||||
current_raster <- terra::rast(tif_file)
|
|
||||||
|
|
||||||
# Extract the CI band (last band)
|
|
||||||
ci_band <- current_raster[[terra::nlyr(current_raster)]]
|
|
||||||
|
|
||||||
# Create a unique field mask for THIS raster's extent
|
|
||||||
# This handles cases where rasters have different extents due to missing data
|
|
||||||
total_notna <- NA_real_
|
|
||||||
total_pixels <- NA_real_
|
|
||||||
|
|
||||||
if (!is.null(field_boundaries)) {
|
|
||||||
tryCatch({
|
|
||||||
# Create mask specific to this raster's grid
|
|
||||||
field_mask <- terra::rasterize(field_boundaries, ci_band, field = 1)
|
|
||||||
|
|
||||||
# Count pixels within field boundaries (for this specific raster)
|
|
||||||
total_pixels <- terra::global(field_mask, fun = "notNA")$notNA
|
|
||||||
|
|
||||||
# Cloud coverage calculated only over field areas
|
|
||||||
ci_field_masked <- terra::mask(ci_band, field_mask, maskvalue = NA)
|
|
||||||
total_notna <- terra::global(ci_field_masked, fun = "notNA")$notNA
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
# If field mask creation fails, fall back to entire raster
|
|
||||||
safe_log(paste("Could not create field mask for", basename(tif_file), ":", e$message), "WARNING")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
# If field mask failed, use entire raster
|
|
||||||
if (is.na(total_notna)) {
|
|
||||||
total_notna <- terra::global(ci_band, fun = "notNA")$notNA
|
|
||||||
total_pixels <- terra::ncell(ci_band)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Calculate cloud coverage percentage (missing = clouds)
|
|
||||||
missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
|
|
||||||
|
|
||||||
aggregated_results[[tif_idx]] <- data.frame(
|
|
||||||
filename = tif_file,
|
|
||||||
notNA = total_notna,
|
|
||||||
total_pixels = total_pixels,
|
|
||||||
missing_pixels_percentage = missing_pct,
|
|
||||||
thres_5perc = as.integer(missing_pct < 5),
|
|
||||||
thres_40perc = as.integer(missing_pct < 45),
|
|
||||||
stringsAsFactors = FALSE
|
|
||||||
)
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING")
|
|
||||||
aggregated_results[[tif_idx]] <<- data.frame(
|
|
||||||
filename = tif_file,
|
|
||||||
notNA = NA_real_,
|
|
||||||
total_pixels = NA_real_,
|
|
||||||
missing_pixels_percentage = 100,
|
|
||||||
thres_5perc = 0,
|
|
||||||
thres_40perc = 0,
|
|
||||||
stringsAsFactors = FALSE
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
# Combine all aggregated results
|
|
||||||
aggregated_df <- if (length(aggregated_results) > 0) {
|
|
||||||
do.call(rbind, aggregated_results)
|
|
||||||
} else {
|
|
||||||
data.frame()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Log results
|
|
||||||
safe_log(paste("Cloud coverage assessment completed for", length(vrt_list), "images"))
|
|
||||||
|
|
||||||
# Return aggregated data only
|
|
||||||
return(aggregated_df)
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
warning("Error in cloud coverage calculation: ", e$message)
|
|
||||||
return(NULL)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Create a mosaic from merged_final_tif files based on cloud coverage
|
|
||||||
#'
|
|
||||||
#' @param tif_files List of processed TIF files (5 bands: R, G, B, NIR, CI)
|
|
||||||
#' @param cloud_coverage_stats Cloud coverage statistics from count_cloud_coverage()
|
|
||||||
#' @param field_boundaries Field boundaries for masking (optional)
|
|
||||||
#' @return A SpatRaster object with 5 bands (Red, Green, Blue, NIR, CI)
|
|
||||||
#'
|
|
||||||
create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NULL) {
|
|
||||||
# If no TIF files, return NULL
|
|
||||||
if (length(tif_files) == 0) {
|
|
||||||
safe_log("No TIF files available for mosaic creation", "ERROR")
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Validate cloud coverage stats
|
|
||||||
mosaic_type <- "Unknown" # Track what type of mosaic is being created
|
|
||||||
|
|
||||||
if (is.null(cloud_coverage_stats) || nrow(cloud_coverage_stats) == 0) {
|
|
||||||
safe_log("No cloud coverage statistics available, using all files", "WARNING")
|
|
||||||
rasters_to_use <- tif_files
|
|
||||||
mosaic_type <- paste("all", length(tif_files), "available images")
|
|
||||||
} else {
|
|
||||||
# Determine best rasters to use based on cloud coverage thresholds
|
|
||||||
# Count how many images meet each threshold
|
|
||||||
num_5perc <- sum(cloud_coverage_stats$thres_5perc, na.rm = TRUE)
|
|
||||||
num_40perc <- sum(cloud_coverage_stats$thres_40perc, na.rm = TRUE)
|
|
||||||
|
|
||||||
if (num_5perc > 1) {
|
|
||||||
# Multiple images with <5% cloud coverage
|
|
||||||
safe_log(paste("Creating max composite from", num_5perc, "cloud-free images (<5% clouds)"))
|
|
||||||
mosaic_type <- paste(num_5perc, "cloud-free images (<5% clouds)")
|
|
||||||
best_coverage <- which(cloud_coverage_stats$thres_5perc > 0)
|
|
||||||
|
|
||||||
} else if (num_5perc == 1) {
|
|
||||||
# Single image with <5% cloud coverage
|
|
||||||
safe_log("Using single cloud-free image (<5% clouds)")
|
|
||||||
mosaic_type <- "single cloud-free image (<5% clouds)"
|
|
||||||
best_coverage <- which(cloud_coverage_stats$thres_5perc > 0)
|
|
||||||
|
|
||||||
} else if (num_40perc > 1) {
|
|
||||||
# Multiple images with <40% cloud coverage
|
|
||||||
safe_log(paste("Creating max composite from", num_40perc, "partially cloudy images (<40% clouds)"), "WARNING")
|
|
||||||
mosaic_type <- paste(num_40perc, "partially cloudy images (<40% clouds)")
|
|
||||||
best_coverage <- which(cloud_coverage_stats$thres_40perc > 0)
|
|
||||||
|
|
||||||
} else if (num_40perc == 1) {
|
|
||||||
# Single image with <40% cloud coverage
|
|
||||||
safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
|
|
||||||
mosaic_type <- "single partially cloudy image (<40% clouds)"
|
|
||||||
best_coverage <- which(cloud_coverage_stats$thres_40perc > 0)
|
|
||||||
|
|
||||||
} else {
|
|
||||||
# No cloud-free images available
|
|
||||||
safe_log("No cloud-free images available, using all images", "WARNING")
|
|
||||||
mosaic_type <- paste("all", nrow(cloud_coverage_stats), "available images")
|
|
||||||
best_coverage <- seq_len(nrow(cloud_coverage_stats))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get filenames of best-coverage images
|
|
||||||
# Match by full filename from cloud stats to TIF files
|
|
||||||
rasters_to_use <- character()
|
|
||||||
for (idx in best_coverage) {
|
|
||||||
# Get the full filename from cloud coverage stats
|
|
||||||
cc_filename <- cloud_coverage_stats$filename[idx]
|
|
||||||
|
|
||||||
# Find matching TIF file by full filename
|
|
||||||
matching_tif <- NULL
|
|
||||||
for (tif_file in tif_files) {
|
|
||||||
tif_basename <- basename(tif_file)
|
|
||||||
if (tif_basename == cc_filename) {
|
|
||||||
matching_tif <- tif_file
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!is.null(matching_tif)) {
|
|
||||||
rasters_to_use <- c(rasters_to_use, matching_tif)
|
|
||||||
} else {
|
|
||||||
safe_log(paste("Warning: Could not find TIF file matching cloud stats entry:", cc_filename), "WARNING")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(rasters_to_use) == 0) {
|
|
||||||
safe_log("Could not match cloud coverage stats to TIF files, using all files", "WARNING")
|
|
||||||
rasters_to_use <- tif_files
|
|
||||||
mosaic_type <- paste("all", length(tif_files), "available images")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Load and mosaic the selected rasters
|
|
||||||
if (length(rasters_to_use) == 1) {
|
|
||||||
# Single file - just load it
|
|
||||||
safe_log(paste("Using single image for mosaic:", basename(rasters_to_use)))
|
|
||||||
mosaic <- terra::rast(rasters_to_use[1])
|
|
||||||
} else {
|
|
||||||
# Multiple files - merge handles different extents/grids automatically
|
|
||||||
safe_log(paste("Creating mosaic from", length(rasters_to_use), "images"))
|
|
||||||
|
|
||||||
# Load all rasters with error handling - only keep successful loads
|
|
||||||
all_rasters <- Filter(Negate(is.null), lapply(rasters_to_use, function(f) {
|
|
||||||
tryCatch({
|
|
||||||
terra::rast(f)
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Warning: Could not load", basename(f), ":", e$message), "WARNING")
|
|
||||||
NULL # Return NULL on error, will be filtered out
|
|
||||||
})
|
|
||||||
}))
|
|
||||||
|
|
||||||
# Check what we loaded
|
|
||||||
safe_log(paste("Loaded", length(all_rasters), "valid rasters from", length(rasters_to_use), "files"))
|
|
||||||
|
|
||||||
if (length(all_rasters) == 0) {
|
|
||||||
safe_log("No valid rasters to merge", "WARNING")
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Merge all rasters (handles different extents and grids automatically)
|
|
||||||
if (length(all_rasters) == 1) {
|
|
||||||
mosaic <- all_rasters[[1]]
|
|
||||||
safe_log("Using single raster after filtering")
|
|
||||||
} else {
|
|
||||||
# Create max composite: take maximum value at each pixel across all dates
|
|
||||||
# This skips clouds (low/zero CI values) in favor of clear pixels from other dates
|
|
||||||
mosaic <- tryCatch({
|
|
||||||
safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds"))
|
|
||||||
|
|
||||||
# Check if all rasters have identical grids (extent and resolution)
|
|
||||||
# This is likely for per-tile mosaics from the same tiling scheme
|
|
||||||
reference_raster <- all_rasters[[1]]
|
|
||||||
ref_ext <- terra::ext(reference_raster)
|
|
||||||
ref_res <- terra::res(reference_raster)
|
|
||||||
|
|
||||||
grids_match <- all(sapply(all_rasters[-1], function(r) {
|
|
||||||
isTRUE(all.equal(terra::ext(r), ref_ext, tolerance = 1e-6)) &&
|
|
||||||
isTRUE(all.equal(terra::res(r), ref_res, tolerance = 1e-6))
|
|
||||||
}))
|
|
||||||
|
|
||||||
if (grids_match) {
|
|
||||||
# All rasters have matching grids - no cropping/resampling needed!
|
|
||||||
safe_log("All rasters have identical grids - stacking directly for max composite")
|
|
||||||
raster_collection <- terra::sprc(all_rasters)
|
|
||||||
max_mosaic <- terra::mosaic(raster_collection, fun = "max")
|
|
||||||
} else {
|
|
||||||
# Grids don't match - need to crop and resample
|
|
||||||
safe_log("Rasters have different grids - cropping and resampling to common extent")
|
|
||||||
|
|
||||||
# Get extent from field boundaries if available, otherwise use raster union
|
|
||||||
if (!is.null(field_boundaries)) {
|
|
||||||
crop_extent <- terra::ext(field_boundaries)
|
|
||||||
safe_log("Using field boundaries extent for consistent area across all dates")
|
|
||||||
} else {
|
|
||||||
# Use union of all extents (covers all data)
|
|
||||||
crop_extent <- terra::ext(all_rasters[[1]])
|
|
||||||
for (i in 2:length(all_rasters)) {
|
|
||||||
crop_extent <- terra::union(crop_extent, terra::ext(all_rasters[[i]]))
|
|
||||||
}
|
|
||||||
safe_log("Using raster union extent")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Crop all rasters to common extent
|
|
||||||
cropped_rasters <- lapply(all_rasters, function(r) {
|
|
||||||
terra::crop(r, crop_extent)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Resample all cropped rasters to match the first one's grid
|
|
||||||
reference_grid <- cropped_rasters[[1]]
|
|
||||||
|
|
||||||
aligned_rasters <- lapply(cropped_rasters, function(r) {
|
|
||||||
if (isTRUE(all.equal(terra::ext(r), terra::ext(reference_grid), tolerance = 1e-6)) &&
|
|
||||||
isTRUE(all.equal(terra::res(r), terra::res(reference_grid), tolerance = 1e-6))) {
|
|
||||||
return(r) # Already aligned
|
|
||||||
}
|
|
||||||
terra::resample(r, reference_grid, method = "near")
|
|
||||||
})
|
|
||||||
|
|
||||||
# Create max composite using mosaic on aligned rasters
|
|
||||||
raster_collection <- terra::sprc(aligned_rasters)
|
|
||||||
max_mosaic <- terra::mosaic(raster_collection, fun = "max")
|
|
||||||
}
|
|
||||||
|
|
||||||
max_mosaic
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Max composite creation failed:", e$message), "WARNING")
|
|
||||||
safe_log("Using first raster only as fallback")
|
|
||||||
all_rasters[[1]]
|
|
||||||
})
|
|
||||||
safe_log(paste("Max composite created - taking clearest pixel at each location"))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Ensure we have exactly the required bands: Red, Green, Blue, NIR, CI
|
|
||||||
required_bands <- c("Red", "Green", "Blue", "NIR", "CI")
|
|
||||||
available_bands <- names(mosaic)
|
|
||||||
|
|
||||||
# Check if all required bands are present
|
|
||||||
if (!all(required_bands %in% available_bands)) {
|
|
||||||
safe_log(paste("Warning: Not all required bands found. Available:", paste(available_bands, collapse = ", ")), "WARNING")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Select only the required bands in the correct order
|
|
||||||
if (all(required_bands %in% available_bands)) {
|
|
||||||
mosaic <- mosaic[[required_bands]]
|
|
||||||
safe_log("Selected Red, Green, Blue, NIR, CI bands")
|
|
||||||
} else {
|
|
||||||
safe_log(paste("Warning: mosaic has", terra::nlyr(mosaic), "bands, expected 5 (R, G, B, NIR, CI)"), "WARNING")
|
|
||||||
if (terra::nlyr(mosaic) > 5) {
|
|
||||||
# Keep only first 5 bands as fallback
|
|
||||||
mosaic <- terra::subset(mosaic, 1:5)
|
|
||||||
safe_log("Keeping only first 5 bands as fallback")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Crop/mask to field boundaries if provided
|
|
||||||
if (!is.null(field_boundaries)) {
|
|
||||||
tryCatch({
|
|
||||||
mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE)
|
|
||||||
safe_log("Mosaic cropped to field boundaries")
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Could not crop to field boundaries:", e$message), "WARNING")
|
|
||||||
# Return uncropped mosaic
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
# Log final mosaic summary
|
|
||||||
safe_log(paste("✓ Mosaic created from", mosaic_type, "-", terra::nlyr(mosaic),
|
|
||||||
"bands,", nrow(mosaic), "x", ncol(mosaic), "pixels"))
|
|
||||||
|
|
||||||
return(mosaic)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Save a mosaic raster to disk
|
|
||||||
#'
|
|
||||||
#' @param mosaic_raster A SpatRaster object to save
|
|
||||||
#' @param output_dir Directory to save the output
|
|
||||||
#' @param file_name Filename for the output raster
|
|
||||||
#' @param plot_result Whether to create visualizations (default: FALSE)
|
|
||||||
#' @param save_mask Whether to save cloud masks separately (default: FALSE)
|
|
||||||
#' @return The file path of the saved raster
|
|
||||||
#'
|
|
||||||
save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALSE, save_mask = FALSE) {
|
|
||||||
# Validate input
|
|
||||||
if (is.null(mosaic_raster)) {
|
|
||||||
stop("No mosaic raster provided to save")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create output directory if it doesn't exist
|
|
||||||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
|
|
||||||
# Create full file path
|
|
||||||
file_path <- here::here(output_dir, file_name)
|
|
||||||
|
|
||||||
# Get cloud mask if it exists
|
|
||||||
cloud_mask <- attr(mosaic_raster, "cloud_mask")
|
|
||||||
|
|
||||||
# Save raster
|
|
||||||
terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE)
|
|
||||||
|
|
||||||
# Save cloud mask if available and requested
|
|
||||||
if (!is.null(cloud_mask) && save_mask) {
|
|
||||||
# Create mask filename by adding _mask before extension
|
|
||||||
mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name)
|
|
||||||
mask_file_path <- here::here(output_dir, mask_file_name)
|
|
||||||
|
|
||||||
# Save the mask
|
|
||||||
terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE)
|
|
||||||
safe_log(paste("Cloud/shadow mask saved to:", mask_file_path))
|
|
||||||
} else if (!is.null(cloud_mask)) {
|
|
||||||
safe_log("Cloud mask available but not saved (save_mask = FALSE)")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create plots if requested
|
|
||||||
if (plot_result) {
|
|
||||||
# Plot the CI band
|
|
||||||
if ("CI" %in% names(mosaic_raster)) {
|
|
||||||
terra::plot(mosaic_raster$CI, main = paste("CI map", file_name))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Plot RGB image
|
|
||||||
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) {
|
|
||||||
terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Plot cloud mask if available
|
|
||||||
if (!is.null(cloud_mask)) {
|
|
||||||
terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name),
|
|
||||||
col = c("red", "green"))
|
|
||||||
}
|
|
||||||
|
|
||||||
# If we have both RGB and cloud mask, create a side-by-side comparison
|
|
||||||
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) {
|
|
||||||
old_par <- par(mfrow = c(1, 2))
|
|
||||||
terra::plotRGB(mosaic_raster, main = "RGB Image")
|
|
||||||
|
|
||||||
# Create a colored mask for visualization (red = cloud/shadow, green = clear)
|
|
||||||
mask_plot <- cloud_mask
|
|
||||||
terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green"))
|
|
||||||
par(old_par)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Log save completion
|
|
||||||
safe_log(paste("Mosaic saved to:", file_path))
|
|
||||||
|
|
||||||
return(file_path)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Create weekly mosaic from pre-split tiles with MAX aggregation
|
|
||||||
#'
|
|
||||||
#' This function processes tiles created by Script 01 and processed by Script 02.
|
|
||||||
#' For each of the 25 tiles independently:
|
|
||||||
#' 1. Collects that tile from all dates in the range
|
|
||||||
#' 2. Calculates cloud coverage per date
|
|
||||||
#' 3. Uses create_mosaic logic to select best dates (cloud-clean preferred)
|
|
||||||
#' 4. Creates MAX composite for that tile
|
|
||||||
#' 5. Saves to weekly_tile_max/tile_XX.tif
|
|
||||||
#'
|
|
||||||
#' Input: merged_final_tif/[DATE]/[TILE_01.tif, TILE_02.tif, ..., TILE_25.tif]
|
|
||||||
#' Output: weekly_tile_max/tile_01.tif through tile_25.tif (25 weekly MAX tiles)
|
|
||||||
#'
|
|
||||||
#' @param dates List from date_list() containing days_filter vector
|
|
||||||
#' @param merged_final_dir Directory containing processed tiles (merged_final_tif)
|
|
||||||
#' @param tile_output_dir Directory to save weekly MAX tiles (weekly_tile_max)
|
|
||||||
#' @param field_boundaries Field boundaries for cloud coverage calculation (optional)
|
|
||||||
#' @return List of paths to created tile files
|
|
||||||
#'
|
|
||||||
create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output_dir, field_boundaries = NULL) {
|
|
||||||
|
|
||||||
safe_log("Starting per-tile mosaic creation with cloud-based date selection...")
|
|
||||||
|
|
||||||
# Create output directory if needed
|
|
||||||
dir.create(tile_output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
|
|
||||||
# Step 1: Discover all tiles from all dates and group by tile ID
|
|
||||||
tile_groups <- list() # Structure: tile_groups$tile_01 = list of files for that tile across dates
|
|
||||||
|
|
||||||
for (date in dates$days_filter) {
|
|
||||||
date_dir <- file.path(merged_final_dir, date)
|
|
||||||
|
|
||||||
if (!dir.exists(date_dir)) {
|
|
||||||
safe_log(paste(" Skipping date:", date, "- directory not found"), "WARNING")
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
tile_files <- list.files(date_dir, pattern = "\\.tif$", full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(tile_files) == 0) {
|
|
||||||
safe_log(paste(" No tiles found for date:", date), "WARNING")
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract tile ID from each filename (e.g., "2026-01-02_01.tif" → tile ID is "01")
|
|
||||||
for (tile_file in tile_files) {
|
|
||||||
# Extract tile number from filename
|
|
||||||
tile_basename <- basename(tile_file)
|
|
||||||
tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", tile_basename)
|
|
||||||
|
|
||||||
if (!tile_id %in% names(tile_groups)) {
|
|
||||||
tile_groups[[tile_id]] <- list()
|
|
||||||
}
|
|
||||||
tile_groups[[tile_id]][[length(tile_groups[[tile_id]]) + 1]] <- tile_file
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(tile_groups) == 0) {
|
|
||||||
stop("No tiles found in date range")
|
|
||||||
}
|
|
||||||
|
|
||||||
safe_log(paste("Found", length(tile_groups), "unique tiles across all dates"))
|
|
||||||
|
|
||||||
# Step 2: Process each tile independently
|
|
||||||
created_tiles <- character()
|
|
||||||
|
|
||||||
for (tile_id in names(tile_groups)) {
|
|
||||||
tile_files_for_this_id <- unlist(tile_groups[[tile_id]])
|
|
||||||
|
|
||||||
safe_log(paste("Processing tile", tile_id, "with", length(tile_files_for_this_id), "dates"))
|
|
||||||
|
|
||||||
# Step 2a: Calculate cloud coverage for this tile across all dates
|
|
||||||
cloud_stats_this_tile <- tryCatch({
|
|
||||||
count_cloud_coverage_for_tile(
|
|
||||||
tile_files = tile_files_for_this_id,
|
|
||||||
field_boundaries = field_boundaries
|
|
||||||
)
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste(" Error calculating cloud coverage for tile", tile_id, "-", e$message), "WARNING")
|
|
||||||
NULL
|
|
||||||
})
|
|
||||||
|
|
||||||
if (is.null(cloud_stats_this_tile) || nrow(cloud_stats_this_tile) == 0) {
|
|
||||||
safe_log(paste(" No valid cloud stats for tile", tile_id, "- using all available dates"), "WARNING")
|
|
||||||
cloud_stats_this_tile <- NULL
|
|
||||||
}
|
|
||||||
|
|
||||||
# Step 2b: Create MAX mosaic for this tile using create_mosaic logic
|
|
||||||
tile_mosaic <- tryCatch({
|
|
||||||
create_mosaic(
|
|
||||||
tif_files = tile_files_for_this_id,
|
|
||||||
cloud_coverage_stats = cloud_stats_this_tile,
|
|
||||||
field_boundaries = NULL # Don't crop individual tiles
|
|
||||||
)
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste(" Error creating mosaic for tile", tile_id, "-", e$message), "WARNING")
|
|
||||||
NULL
|
|
||||||
})
|
|
||||||
|
|
||||||
if (is.null(tile_mosaic)) {
|
|
||||||
safe_log(paste(" Failed to create mosaic for tile", tile_id, "- skipping"), "WARNING")
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
# DEBUG: Check mosaic content before saving
|
|
||||||
safe_log(paste(" DEBUG: Mosaic tile", tile_id, "dimensions:", nrow(tile_mosaic), "x", ncol(tile_mosaic)))
|
|
||||||
safe_log(paste(" DEBUG: Mosaic tile", tile_id, "bands:", terra::nlyr(tile_mosaic)))
|
|
||||||
|
|
||||||
# Check first band values
|
|
||||||
band1 <- tile_mosaic[[1]]
|
|
||||||
band1_min <- terra::global(band1, fun = "min", na.rm = TRUE)$min
|
|
||||||
band1_max <- terra::global(band1, fun = "max", na.rm = TRUE)$max
|
|
||||||
safe_log(paste(" DEBUG: Band 1 MIN=", round(band1_min, 2), "MAX=", round(band1_max, 2)))
|
|
||||||
|
|
||||||
# Step 2c: Save this tile's weekly MAX mosaic
|
|
||||||
# Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1)
|
|
||||||
tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_",
|
|
||||||
sprintf("%02d", as.integer(tile_id)), ".tif")
|
|
||||||
tile_output_path <- file.path(tile_output_dir, tile_filename)
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
terra::writeRaster(tile_mosaic, tile_output_path, overwrite = TRUE)
|
|
||||||
safe_log(paste(" ✓ Saved tile", tile_id, "to:", tile_filename))
|
|
||||||
created_tiles <- c(created_tiles, tile_output_path)
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste(" Error saving tile", tile_id, "-", e$message), "WARNING")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
safe_log(paste("✓ Created", length(created_tiles), "weekly MAX tiles in", tile_output_dir))
|
|
||||||
|
|
||||||
return(created_tiles)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Calculate cloud coverage for a single tile across multiple dates
|
|
||||||
#'
|
|
||||||
#' Helper function for per-tile cloud assessment.
|
|
||||||
#' Takes tile files from different dates and calculates cloud coverage for each.
|
|
||||||
#' Cloud coverage is calculated ONLY within field boundaries, so total_pixels
|
|
||||||
#' varies per tile based on which fields are present in that tile area.
|
|
||||||
#'
|
|
||||||
#' @param tile_files Character vector of tile file paths from different dates
|
|
||||||
#' @param field_boundaries Field boundaries for analysis (required for per-field counting)
|
|
||||||
#' @return Data frame with cloud stats for each date/tile
|
|
||||||
#'
|
|
||||||
count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) {
|
|
||||||
if (length(tile_files) == 0) {
|
|
||||||
warning("No tile files provided for cloud coverage calculation")
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
aggregated_results <- list()
|
|
||||||
|
|
||||||
for (idx in seq_along(tile_files)) {
|
|
||||||
tile_file <- tile_files[idx]
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
# Load the tile
|
|
||||||
current_raster <- terra::rast(tile_file)
|
|
||||||
|
|
||||||
# Extract the CI band (last band in 5-band output)
|
|
||||||
ci_band <- current_raster[[terra::nlyr(current_raster)]]
|
|
||||||
|
|
||||||
# Calculate cloud coverage within field boundaries
|
|
||||||
if (!is.null(field_boundaries)) {
|
|
||||||
# Create a reference raster template (same extent/resolution as ci_band, but independent of its data)
|
|
||||||
# This ensures field_mask shows the potential field area even if ci_band is entirely cloudy (all NA)
|
|
||||||
ref_template <- terra::rast(terra::ext(ci_band), resolution = terra::res(ci_band),
|
|
||||||
crs = terra::crs(ci_band))
|
|
||||||
terra::values(ref_template) <- 1 # Fill entire raster with 1s
|
|
||||||
|
|
||||||
# Crop and mask to field boundaries: keeps 1 inside fields, NA outside
|
|
||||||
# This is independent of ci_band's data - represents the potential field area
|
|
||||||
field_mask <- terra::crop(ref_template, field_boundaries, mask = TRUE)
|
|
||||||
|
|
||||||
# Count total potential field pixels from the mask (independent of clouds)
|
|
||||||
total_pixels <- terra::global(field_mask, fun = "notNA")$notNA
|
|
||||||
|
|
||||||
# Now crop and mask CI band to field boundaries to count actual valid (non-cloud) pixels
|
|
||||||
ci_masked <- terra::crop(ci_band, field_boundaries, mask = TRUE)
|
|
||||||
total_notna <- terra::global(ci_masked, fun = "notNA")$notNA
|
|
||||||
} else {
|
|
||||||
# If no field boundaries provided, use entire tile
|
|
||||||
total_notna <- terra::global(ci_band, fun = "notNA")$notNA
|
|
||||||
total_pixels <- terra::ncell(ci_band)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Cloud coverage percentage (missing = clouds)
|
|
||||||
missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
|
|
||||||
|
|
||||||
aggregated_results[[idx]] <- data.frame(
|
|
||||||
filename = basename(tile_file), # Keep full filename: 2026-01-07_03.tif
|
|
||||||
notNA = total_notna,
|
|
||||||
total_pixels = total_pixels,
|
|
||||||
missing_pixels_percentage = missing_pct,
|
|
||||||
thres_5perc = as.integer(missing_pct < 5),
|
|
||||||
thres_40perc = as.integer(missing_pct < 45),
|
|
||||||
stringsAsFactors = FALSE
|
|
||||||
)
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error processing tile:", basename(tile_file), "-", e$message), "WARNING")
|
|
||||||
aggregated_results[[idx]] <<- data.frame(
|
|
||||||
filename = tile_file,
|
|
||||||
notNA = NA_real_,
|
|
||||||
total_pixels = NA_real_,
|
|
||||||
missing_pixels_percentage = 100,
|
|
||||||
thres_5perc = 0,
|
|
||||||
thres_40perc = 0,
|
|
||||||
stringsAsFactors = FALSE
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
# Combine results
|
|
||||||
aggregated_df <- if (length(aggregated_results) > 0) {
|
|
||||||
do.call(rbind, aggregated_results)
|
|
||||||
} else {
|
|
||||||
data.frame()
|
|
||||||
}
|
|
||||||
|
|
||||||
return(aggregated_df)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -1,28 +1,55 @@
|
||||||
# 80_CALCULATE_KPIS.R (CONSOLIDATED KPI CALCULATION)
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# UNIFIED KPI CALCULATION SCRIPT
|
# SCRIPT 80: Key Performance Indicator (KPI) Calculation
|
||||||
|
# ============================================================================
|
||||||
|
# PURPOSE:
|
||||||
|
# Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes
|
||||||
|
# field uniformity, growth trends (4-week and 8-week), area change detection,
|
||||||
|
# TCH forecasts, stress identification, and weed presence. Generates
|
||||||
|
# comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting.
|
||||||
#
|
#
|
||||||
# This script combines:
|
# INPUT DATA:
|
||||||
# 1. Per-field weekly analysis (from 09c: field-level trends, phases, statuses)
|
# - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif
|
||||||
# 2. Farm-level KPI metrics (from old 09: 6 high-level indicators)
|
# - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
|
||||||
|
# - Source 3: Historical field stats (RDS from previous weeks)
|
||||||
#
|
#
|
||||||
# FEATURES:
|
# OUTPUT DATA:
|
||||||
# - Per-field analysis with SC-64 enhancements (4-week trends, CI percentiles, etc.)
|
# - Destination: laravel_app/storage/app/{project}/output/
|
||||||
# - Farm-level KPI calculation (6 metrics for executive overview)
|
# - Format: Excel (.xlsx), CSV (.csv), RDS (.rds)
|
||||||
# - Parallel processing (tile-aware, 1000+ fields supported)
|
# - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata
|
||||||
# - Comprehensive Excel + RDS + CSV exports (21 columns per spec)
|
|
||||||
# - Test mode for development
|
|
||||||
|
|
||||||
# CRITICAL INTEGRATIONS:
|
|
||||||
#
|
#
|
||||||
# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307)
|
# USAGE:
|
||||||
# [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv
|
# Rscript 80_calculate_kpis.R [project] [week] [year]
|
||||||
# Columns: field, imminent_prob, detected_prob, week, year
|
|
||||||
# [✓] LEFT JOIN to field_analysis_df by field
|
|
||||||
# [✓] Use actual harvest probability data instead of placeholder
|
|
||||||
#
|
#
|
||||||
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23)
|
# Example (Windows PowerShell):
|
||||||
# [✓] Load harvest.xlsx with planting_date (season_start)
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||||||
|
# - week: ISO week number (numeric, 1-53, default current week)
|
||||||
|
# - year: ISO year (numeric, default current year)
|
||||||
|
#
|
||||||
|
# CLIENT TYPES:
|
||||||
|
# - cane_supply (ANGATA): Yes - uses 80_utils_cane_supply.R (placeholder)
|
||||||
|
# - agronomic_support (AURA): Yes - uses 80_utils_agronomic_support.R (6 KPI funcs)
|
||||||
|
#
|
||||||
|
# DEPENDENCIES:
|
||||||
|
# - Packages: terra, sf, tidyverse, lubridate, writexl, spdep
|
||||||
|
# - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R
|
||||||
|
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
|
||||||
|
# - Input data: Weekly mosaic TIFFs (Script 40 output)
|
||||||
|
# - Data directories: weekly_mosaic/, output/ (created if missing)
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast,
|
||||||
|
# Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling %
|
||||||
|
# - Client-aware: Conditional sourcing based on client_config$script_90_compatible
|
||||||
|
# - Exports: 21-column output with field-level metrics, phase, status, alerts
|
||||||
|
# - Supports 4-week and 8-week trend analysis from historical RDS cache
|
||||||
|
# - Critical dependency for Scripts 90/91 (reporting/dashboards)
|
||||||
|
# - Uses Moran's I for spatial clustering detection (weed/stress patterns)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
# [✓] Extract planting dates per field
|
# [✓] Extract planting dates per field
|
||||||
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
|
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
|
||||||
#
|
#
|
||||||
|
|
@ -96,48 +123,77 @@ WEEKS_FOR_CV_TREND_LONG <- 8
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
suppressPackageStartupMessages({
|
suppressPackageStartupMessages({
|
||||||
library(here)
|
# File path handling
|
||||||
library(sf)
|
library(here) # For relative path resolution (platform-independent file paths)
|
||||||
library(terra)
|
|
||||||
library(dplyr)
|
# Spatial data handling
|
||||||
library(tidyr)
|
library(sf) # For reading/manipulating field boundaries (GeoJSON)
|
||||||
library(lubridate)
|
library(terra) # For raster operations (reading mosaic TIFFs)
|
||||||
library(readr)
|
|
||||||
library(readxl)
|
# Data manipulation
|
||||||
library(writexl)
|
library(dplyr) # For data wrangling (filter, mutate, group_by, summarize)
|
||||||
library(purrr)
|
library(tidyr) # For data reshaping (pivot_longer, pivot_wider, gather)
|
||||||
library(furrr)
|
library(lubridate) # For date/time operations (week extraction, date arithmetic)
|
||||||
library(future)
|
|
||||||
library(caret)
|
# File I/O
|
||||||
library(CAST)
|
library(readr) # For reading CSV files (harvest predictions from Python)
|
||||||
library(randomForest)
|
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
|
||||||
|
library(writexl) # For writing Excel outputs (KPI summary tables)
|
||||||
|
library(progress) # For progress bars during field processing
|
||||||
|
|
||||||
|
# ML/Analysis (optional - only for harvest model inference)
|
||||||
tryCatch({
|
tryCatch({
|
||||||
library(torch)
|
library(torch) # For PyTorch model inference (harvest readiness prediction)
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
message("Note: torch package not available - harvest model inference will be skipped")
|
message("Note: torch package not available - harvest model inference will be skipped")
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# LOAD CONFIGURATION - MUST BE DONE FIRST
|
||||||
|
# ============================================================================
|
||||||
|
# Parameters must be loaded early to determine client type and paths
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
source(here("r_app", "parameters_project.R"))
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("Error loading parameters_project.R: ", e$message)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Get client configuration from global project setup
|
||||||
|
# NOTE: This cannot be done until parameters_project.R is sourced
|
||||||
|
# We determine client_type from the current project_dir (if running in main() context)
|
||||||
|
# For now, set a placeholder that will be overridden in main()
|
||||||
|
if (exists("project_dir", envir = .GlobalEnv)) {
|
||||||
|
temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv))
|
||||||
|
} else {
|
||||||
|
temp_client_type <- "cane_supply" # Safe default
|
||||||
|
}
|
||||||
|
temp_client_config <- get_client_kpi_config(temp_client_type)
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
|
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
# All clients use the common utilities (shared statistical functions, reporting)
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source(here("r_app", "80_weekly_stats_utils.R"))
|
source(here("r_app", "80_utils_common.R"))
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
stop("Error loading 80_weekly_stats_utils.R: ", e$message)
|
stop("Error loading 80_utils_common.R: ", e$message)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Load both client-specific utilities (functions will be available for both workflows)
|
||||||
|
# This avoids needing to determine client type at startup time
|
||||||
|
message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...")
|
||||||
|
tryCatch({
|
||||||
|
source(here("r_app", "80_utils_agronomic_support.R"))
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("Error loading 80_utils_agronomic_support.R: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source(here("r_app", "80_report_building_utils.R"))
|
source(here("r_app", "80_utils_cane_supply.R"))
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
stop("Error loading 80_report_building_utils.R: ", e$message)
|
stop("Error loading 80_utils_cane_supply.R: ", e$message)
|
||||||
})
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
source(here("r_app", "kpi_utils.R"))
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Error loading kpi_utils.R: ", e$message)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -176,9 +232,6 @@ STATUS_TRIGGERS <- data.frame(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# MAIN
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
# Parse command-line arguments
|
# Parse command-line arguments
|
||||||
|
|
@ -187,7 +240,8 @@ main <- function() {
|
||||||
# end_date (arg 1)
|
# end_date (arg 1)
|
||||||
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
|
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
|
||||||
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
|
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
|
||||||
as.Date(args[1])
|
# Parse date explicitly in YYYY-MM-DD format from command line
|
||||||
|
as.Date(args[1], format = "%Y-%m-%d")
|
||||||
} else if (exists("end_date", envir = .GlobalEnv)) {
|
} else if (exists("end_date", envir = .GlobalEnv)) {
|
||||||
global_date <- get("end_date", envir = .GlobalEnv)
|
global_date <- get("end_date", envir = .GlobalEnv)
|
||||||
# Check if it's a valid Date with length > 0
|
# Check if it's a valid Date with length > 0
|
||||||
|
|
@ -233,7 +287,7 @@ main <- function() {
|
||||||
message(strrep("=", 70))
|
message(strrep("=", 70))
|
||||||
message("Date:", format(end_date, "%Y-%m-%d"))
|
message("Date:", format(end_date, "%Y-%m-%d"))
|
||||||
message("Project:", project_dir)
|
message("Project:", project_dir)
|
||||||
message("Mode: Per-field analysis (SC-64) + Farm-level KPIs")
|
message("Mode: Conditional KPI calculation based on client type")
|
||||||
message("")
|
message("")
|
||||||
|
|
||||||
# Load configuration and utilities
|
# Load configuration and utilities
|
||||||
|
|
@ -245,48 +299,151 @@ main <- function() {
|
||||||
stop("Error loading parameters_project.R: ", e$message)
|
stop("Error loading parameters_project.R: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Initialize project directories and configuration
|
||||||
|
setup <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
|
# DETERMINE CLIENT TYPE AND KPI CONFIGURATION
|
||||||
|
client_type <- get_client_type(project_dir)
|
||||||
|
client_config <- get_client_kpi_config(client_type)
|
||||||
|
|
||||||
|
# Assign to global environment so utilities and downstream scripts can access it
|
||||||
|
assign("client_config", client_config, envir = .GlobalEnv)
|
||||||
|
|
||||||
|
message("Client Type:", client_type)
|
||||||
|
message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
|
||||||
|
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
|
||||||
|
|
||||||
|
# Use centralized paths from setup object (no need for file.path calls)
|
||||||
|
weekly_mosaic <- setup$weekly_mosaic_dir
|
||||||
|
daily_vals_dir <- setup$daily_ci_vals_dir
|
||||||
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source(here("r_app", "30_growth_model_utils.R"))
|
source(here("r_app", "30_growth_model_utils.R"))
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
|
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
|
||||||
})
|
})
|
||||||
|
|
||||||
# ========== PER-FIELD ANALYSIS (SC-64) ==========
|
# CONDITIONAL EXECUTION BASED ON CLIENT TYPE
|
||||||
|
# ============================================
|
||||||
|
|
||||||
|
if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) {
|
||||||
|
# AURA WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility
|
||||||
|
message("\n", strrep("=", 70))
|
||||||
|
message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
|
||||||
|
message(strrep("=", 70))
|
||||||
|
|
||||||
|
# Prepare inputs for KPI calculation (already created by setup_project_directories)
|
||||||
|
reports_dir_kpi <- setup$kpi_reports_dir
|
||||||
|
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
|
||||||
|
|
||||||
|
# Load field boundaries for AURA workflow (use data_dir from setup)
|
||||||
|
message("\nLoading field boundaries for AURA KPI calculation...")
|
||||||
|
tryCatch({
|
||||||
|
boundaries_result <- load_field_boundaries(setup$data_dir)
|
||||||
|
|
||||||
|
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
|
||||||
|
field_boundaries_sf <- boundaries_result$field_boundaries_sf
|
||||||
|
} else {
|
||||||
|
field_boundaries_sf <- boundaries_result
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nrow(field_boundaries_sf) == 0) {
|
||||||
|
stop("No fields loaded from boundaries")
|
||||||
|
}
|
||||||
|
|
||||||
|
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("ERROR loading field boundaries: ", e$message)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Load harvesting data
|
||||||
|
if (!exists("harvesting_data")) {
|
||||||
|
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
|
||||||
|
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract current week/year from end_date
|
||||||
|
current_week <- as.numeric(format(end_date, "%V"))
|
||||||
|
current_year <- as.numeric(format(end_date, "%G"))
|
||||||
|
|
||||||
|
# Call with correct signature
|
||||||
|
kpi_results <- calculate_all_kpis(
|
||||||
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
|
current_week = current_week,
|
||||||
|
current_year = current_year,
|
||||||
|
current_mosaic_dir = setup$weekly_mosaic_dir,
|
||||||
|
harvesting_data = harvesting_data,
|
||||||
|
ci_rds_path = cumulative_CI_vals_dir,
|
||||||
|
output_dir = reports_dir_kpi
|
||||||
|
)
|
||||||
|
|
||||||
|
cat("\n=== AURA KPI CALCULATION COMPLETE ===\n")
|
||||||
|
cat("Summary tables saved for Script 90 integration\n")
|
||||||
|
cat("Output directory:", reports_dir_kpi, "\n\n")
|
||||||
|
|
||||||
|
} else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) {
|
||||||
|
# CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment
|
||||||
|
message("\n", strrep("=", 70))
|
||||||
|
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
|
||||||
|
message(strrep("=", 70))
|
||||||
|
|
||||||
|
# Set reports_dir for CANE_SUPPLY workflow (used by export functions)
|
||||||
|
reports_dir <- setup$kpi_reports_dir
|
||||||
|
data_dir <- setup$data_dir
|
||||||
|
|
||||||
|
# Continue with existing per-field analysis code below
|
||||||
|
|
||||||
message("\n", strrep("-", 70))
|
message("\n", strrep("-", 70))
|
||||||
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
|
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
|
||||||
message(strrep("-", 70))
|
message(strrep("-", 70))
|
||||||
|
|
||||||
# Calculate ISO week numbers and ISO years using helper from kpi_utils.R
|
|
||||||
weeks <- calculate_week_numbers(end_date)
|
weeks <- calculate_week_numbers(end_date)
|
||||||
current_week <- weeks$current_week
|
current_week <- weeks$current_week
|
||||||
current_iso_year <- weeks$current_iso_year
|
current_year <- weeks$current_year
|
||||||
previous_week <- weeks$previous_week
|
previous_week <- weeks$previous_week
|
||||||
previous_iso_year <- weeks$previous_iso_year
|
previous_year <- weeks$previous_year
|
||||||
|
|
||||||
message(paste("Week:", current_week, "/ ISO Year:", current_iso_year))
|
message(paste("Week:", current_week, "/ Year (ISO 8601):", year))
|
||||||
|
|
||||||
# Find tile files - approach from Script 20
|
# Find per-field weekly mosaics
|
||||||
message("Finding tile files...")
|
message("Finding per-field weekly mosaics...")
|
||||||
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, current_iso_year)
|
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
|
||||||
|
|
||||||
# Detect grid size subdirectory
|
if (!dir.exists(weekly_mosaic)) {
|
||||||
detected_grid_size <- NA
|
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
|
||||||
if (dir.exists(weekly_tile_max)) {
|
"\nScript 40 (mosaic creation) must be run first."))
|
||||||
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
|
}
|
||||||
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
|
||||||
if (length(grid_patterns) > 0) {
|
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
|
||||||
detected_grid_size <- grid_patterns[1]
|
field_dirs <- field_dirs[field_dirs != ""]
|
||||||
mosaic_dir <- file.path(weekly_tile_max, detected_grid_size)
|
|
||||||
message(paste(" Using grid-size subdirectory:", detected_grid_size))
|
if (length(field_dirs) == 0) {
|
||||||
|
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
|
||||||
|
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Verify we have mosaics for this week
|
||||||
|
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
|
||||||
|
per_field_files <- c()
|
||||||
|
for (field in field_dirs) {
|
||||||
|
field_mosaic_dir <- file.path(weekly_mosaic, field)
|
||||||
|
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
||||||
|
if (length(files) > 0) {
|
||||||
|
per_field_files <- c(per_field_files, files)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
if (length(per_field_files) == 0) {
|
||||||
if (length(tile_files) == 0) {
|
stop(paste("ERROR: No mosaics found for week", current_week, "year", year,
|
||||||
stop(paste("No tile files found for week", current_week, current_iso_year, "in", mosaic_dir))
|
"\nExpected pattern:", single_file_pattern,
|
||||||
|
"\nChecked:", weekly_mosaic))
|
||||||
}
|
}
|
||||||
message(paste(" Found", length(tile_files), "tiles"))
|
|
||||||
|
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
|
||||||
|
|
||||||
|
mosaic_mode <- "per-field"
|
||||||
|
mosaic_dir <- weekly_mosaic
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Load field boundaries
|
# Load field boundaries
|
||||||
tryCatch({
|
tryCatch({
|
||||||
|
|
@ -316,10 +473,11 @@ main <- function() {
|
||||||
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
|
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
|
||||||
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
|
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
|
||||||
|
|
||||||
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir,
|
historical_data <- load_historical_field_data(project_dir, current_week, year, reports_dir,
|
||||||
num_weeks = num_weeks_to_load,
|
num_weeks = num_weeks_to_load,
|
||||||
auto_generate = allow_auto_gen,
|
auto_generate = allow_auto_gen,
|
||||||
field_boundaries_sf = field_boundaries_sf)
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
|
daily_vals_dir = daily_vals_dir)
|
||||||
|
|
||||||
# Load harvest.xlsx for planting dates (season_start)
|
# Load harvest.xlsx for planting dates (season_start)
|
||||||
message("\nLoading harvest data from harvest.xlsx for planting dates...")
|
message("\nLoading harvest data from harvest.xlsx for planting dates...")
|
||||||
|
|
@ -353,14 +511,15 @@ main <- function() {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
|
# Build per-field configuration
|
||||||
# ============================================================================
|
message("\nPreparing mosaic configuration for statistics calculation...")
|
||||||
# NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
|
message(" ✓ Using per-field mosaic architecture (1 TIFF per field)")
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Build tile grid (needed by calculate_field_statistics)
|
# Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
|
||||||
message("\nBuilding tile grid for current week...")
|
field_grid <- list(
|
||||||
tile_grid <- build_tile_grid(mosaic_dir, current_week, current_iso_year)
|
mosaic_dir = mosaic_dir,
|
||||||
|
mode = "per-field"
|
||||||
|
)
|
||||||
|
|
||||||
message("\nUsing modular RDS-based approach for weekly statistics...")
|
message("\nUsing modular RDS-based approach for weekly statistics...")
|
||||||
|
|
||||||
|
|
@ -371,7 +530,7 @@ main <- function() {
|
||||||
year = current_iso_year,
|
year = current_iso_year,
|
||||||
project_dir = project_dir,
|
project_dir = project_dir,
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
mosaic_dir = tile_grid$mosaic_dir,
|
mosaic_dir = field_grid$mosaic_dir,
|
||||||
reports_dir = reports_dir,
|
reports_dir = reports_dir,
|
||||||
report_date = end_date
|
report_date = end_date
|
||||||
)
|
)
|
||||||
|
|
@ -386,10 +545,10 @@ main <- function() {
|
||||||
|
|
||||||
prev_stats <- load_or_calculate_weekly_stats(
|
prev_stats <- load_or_calculate_weekly_stats(
|
||||||
week_num = previous_week,
|
week_num = previous_week,
|
||||||
year = previous_iso_year,
|
year = previous_year,
|
||||||
project_dir = project_dir,
|
project_dir = project_dir,
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
mosaic_dir = tile_grid$mosaic_dir,
|
mosaic_dir = field_grid$mosaic_dir,
|
||||||
reports_dir = reports_dir,
|
reports_dir = reports_dir,
|
||||||
report_date = prev_report_date
|
report_date = prev_report_date
|
||||||
)
|
)
|
||||||
|
|
@ -409,9 +568,11 @@ main <- function() {
|
||||||
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
|
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
|
||||||
|
|
||||||
# Load weekly harvest probabilities from script 31 (if available)
|
# Load weekly harvest probabilities from script 31 (if available)
|
||||||
|
# Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
|
||||||
message("\n4. Loading harvest probabilities from script 31...")
|
message("\n4. Loading harvest probabilities from script 31...")
|
||||||
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
|
harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
|
||||||
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_iso_year))
|
harvest_prob_file <- file.path(harvest_prob_dir,
|
||||||
|
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
|
||||||
message(paste(" Looking for:", harvest_prob_file))
|
message(paste(" Looking for:", harvest_prob_file))
|
||||||
|
|
||||||
imminent_prob_data <- tryCatch({
|
imminent_prob_data <- tryCatch({
|
||||||
|
|
@ -854,6 +1015,14 @@ main <- function() {
|
||||||
cat(" - Per-field data exported\n")
|
cat(" - Per-field data exported\n")
|
||||||
cat(" - Farm-level KPIs calculated\n")
|
cat(" - Farm-level KPIs calculated\n")
|
||||||
cat(" - All outputs in:", reports_dir, "\n\n")
|
cat(" - All outputs in:", reports_dir, "\n\n")
|
||||||
|
|
||||||
|
} else {
|
||||||
|
# Unknown client type - log warning and exit
|
||||||
|
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
|
||||||
|
cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
|
||||||
|
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
|
||||||
|
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sys.nframe() == 0) {
|
if (sys.nframe() == 0) {
|
||||||
|
|
|
||||||
|
|
@ -1,258 +0,0 @@
|
||||||
# 80_REPORT_BUILDING_UTILS.R
|
|
||||||
# ============================================================================
|
|
||||||
# UTILITY FUNCTIONS FOR REPORT GENERATION AND EXCEL/CSV EXPORT
|
|
||||||
#
|
|
||||||
# This file contains reusable functions for:
|
|
||||||
# - Field analysis summary generation
|
|
||||||
# - Excel/CSV/RDS export functionality
|
|
||||||
# - Farm-level KPI aggregation and summary
|
|
||||||
# - Tile-based KPI extraction (alternative calculation method)
|
|
||||||
#
|
|
||||||
# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# SUMMARY GENERATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
generate_field_analysis_summary <- function(field_df) {
|
|
||||||
message("Generating summary statistics...")
|
|
||||||
|
|
||||||
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
|
|
||||||
|
|
||||||
germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE)
|
|
||||||
tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE)
|
|
||||||
grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE)
|
|
||||||
maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE)
|
|
||||||
unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE)
|
|
||||||
|
|
||||||
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
|
|
||||||
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
|
|
||||||
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
|
|
||||||
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
|
|
||||||
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
|
|
||||||
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
|
|
||||||
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
|
|
||||||
|
|
||||||
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
|
|
||||||
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
|
|
||||||
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
|
|
||||||
total_fields <- nrow(field_df)
|
|
||||||
|
|
||||||
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
|
|
||||||
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
|
|
||||||
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
|
|
||||||
|
|
||||||
summary_df <- data.frame(
|
|
||||||
Category = c(
|
|
||||||
"--- PHASE DISTRIBUTION ---",
|
|
||||||
"Germination",
|
|
||||||
"Tillering",
|
|
||||||
"Grand Growth",
|
|
||||||
"Maturation",
|
|
||||||
"Unknown phase",
|
|
||||||
"--- STATUS TRIGGERS ---",
|
|
||||||
"Harvest ready",
|
|
||||||
"Stress detected",
|
|
||||||
"Strong recovery",
|
|
||||||
"Growth on track",
|
|
||||||
"Germination complete",
|
|
||||||
"Germination started",
|
|
||||||
"No trigger",
|
|
||||||
"--- CLOUD COVERAGE (FIELDS) ---",
|
|
||||||
"Clear view",
|
|
||||||
"Partial coverage",
|
|
||||||
"No image available",
|
|
||||||
"--- CLOUD COVERAGE (ACREAGE) ---",
|
|
||||||
"Clear view",
|
|
||||||
"Partial coverage",
|
|
||||||
"No image available",
|
|
||||||
"--- TOTAL ---",
|
|
||||||
"Total Acreage"
|
|
||||||
),
|
|
||||||
Acreage = c(
|
|
||||||
NA,
|
|
||||||
round(germination_acreage, 2),
|
|
||||||
round(tillering_acreage, 2),
|
|
||||||
round(grand_growth_acreage, 2),
|
|
||||||
round(maturation_acreage, 2),
|
|
||||||
round(unknown_phase_acreage, 2),
|
|
||||||
NA,
|
|
||||||
round(harvest_ready_acreage, 2),
|
|
||||||
round(stress_acreage, 2),
|
|
||||||
round(recovery_acreage, 2),
|
|
||||||
round(growth_on_track_acreage, 2),
|
|
||||||
round(germination_complete_acreage, 2),
|
|
||||||
round(germination_started_acreage, 2),
|
|
||||||
round(no_trigger_acreage, 2),
|
|
||||||
NA,
|
|
||||||
paste0(clear_fields, " fields"),
|
|
||||||
paste0(partial_fields, " fields"),
|
|
||||||
paste0(no_image_fields, " fields"),
|
|
||||||
NA,
|
|
||||||
round(clear_acreage, 2),
|
|
||||||
round(partial_acreage, 2),
|
|
||||||
round(no_image_acreage, 2),
|
|
||||||
NA,
|
|
||||||
round(total_acreage, 2)
|
|
||||||
),
|
|
||||||
stringsAsFactors = FALSE
|
|
||||||
)
|
|
||||||
|
|
||||||
return(summary_df)
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# EXPORT FUNCTIONS
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
|
|
||||||
message("Exporting per-field analysis to Excel, CSV, and RDS...")
|
|
||||||
|
|
||||||
field_df_rounded <- field_df %>%
|
|
||||||
mutate(across(where(is.numeric), ~ round(., 2)))
|
|
||||||
|
|
||||||
# Handle NULL summary_df
|
|
||||||
summary_df_rounded <- if (!is.null(summary_df)) {
|
|
||||||
summary_df %>%
|
|
||||||
mutate(across(where(is.numeric), ~ round(., 2)))
|
|
||||||
} else {
|
|
||||||
NULL
|
|
||||||
}
|
|
||||||
|
|
||||||
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
|
|
||||||
if (!dir.exists(output_subdir)) {
|
|
||||||
dir.create(output_subdir, recursive = TRUE)
|
|
||||||
}
|
|
||||||
|
|
||||||
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
|
|
||||||
excel_path <- file.path(output_subdir, excel_filename)
|
|
||||||
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
|
|
||||||
|
|
||||||
# Build sheets list dynamically
|
|
||||||
sheets <- list(
|
|
||||||
"Field Data" = field_df_rounded
|
|
||||||
)
|
|
||||||
if (!is.null(summary_df_rounded)) {
|
|
||||||
sheets[["Summary"]] <- summary_df_rounded
|
|
||||||
}
|
|
||||||
|
|
||||||
write_xlsx(sheets, excel_path)
|
|
||||||
message(paste("✓ Field analysis Excel exported to:", excel_path))
|
|
||||||
|
|
||||||
kpi_data <- list(
|
|
||||||
field_analysis = field_df_rounded,
|
|
||||||
field_analysis_summary = summary_df_rounded,
|
|
||||||
metadata = list(
|
|
||||||
current_week = current_week,
|
|
||||||
year = year,
|
|
||||||
project = project_dir,
|
|
||||||
created_at = Sys.time()
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
|
|
||||||
rds_path <- file.path(reports_dir, "kpis", rds_filename)
|
|
||||||
|
|
||||||
saveRDS(kpi_data, rds_path)
|
|
||||||
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
|
||||||
|
|
||||||
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
|
|
||||||
csv_path <- file.path(output_subdir, csv_filename)
|
|
||||||
write_csv(field_df_rounded, csv_path)
|
|
||||||
message(paste("✓ Field analysis CSV exported to:", csv_path))
|
|
||||||
|
|
||||||
return(list(excel = excel_path, rds = rds_path, csv = csv_path))
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# TILE-BASED KPI EXTRACTION (Alternative calculation method)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# [COMMENTED OUT / UNUSED - kept for reference]
|
|
||||||
# These functions provide tile-based extraction as an alternative to field_statistics approach
|
|
||||||
# Currently replaced by calculate_field_statistics() in 80_weekly_stats_utils.R
|
|
||||||
# Uncomment if parallel processing of tiles is needed in future
|
|
||||||
|
|
||||||
# calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) {
|
|
||||||
# message("Calculating field-level KPI statistics from tiles...")
|
|
||||||
#
|
|
||||||
# tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
|
|
||||||
# tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE)
|
|
||||||
#
|
|
||||||
# if (length(tile_files) == 0) {
|
|
||||||
# message("No tiles found for week", week_num, year)
|
|
||||||
# return(NULL)
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# message(paste("Processing", length(tile_files), "tiles in parallel..."))
|
|
||||||
#
|
|
||||||
# field_kpi_list <- furrr::future_map(
|
|
||||||
# tile_files,
|
|
||||||
# ~ process_single_kpi_tile(
|
|
||||||
# tile_file = .,
|
|
||||||
# field_boundaries_sf = field_boundaries_sf,
|
|
||||||
# tile_grid = tile_grid
|
|
||||||
# ),
|
|
||||||
# .progress = TRUE,
|
|
||||||
# .options = furrr::furrr_options(seed = TRUE)
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# field_kpi_stats <- dplyr::bind_rows(field_kpi_list)
|
|
||||||
#
|
|
||||||
# if (nrow(field_kpi_stats) == 0) {
|
|
||||||
# message(" No KPI data extracted from tiles")
|
|
||||||
# return(NULL)
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields"))
|
|
||||||
# return(field_kpi_stats)
|
|
||||||
# }
|
|
||||||
|
|
||||||
# process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) {
|
|
||||||
# # Helper function for calculate_field_kpis_from_tiles
|
|
||||||
# tryCatch({
|
|
||||||
# tile_basename <- basename(tile_file)
|
|
||||||
# tile_raster <- terra::rast(tile_file)
|
|
||||||
# ci_band <- tile_raster[[1]]
|
|
||||||
#
|
|
||||||
# field_bbox <- sf::st_bbox(field_boundaries_sf)
|
|
||||||
# ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out")
|
|
||||||
#
|
|
||||||
# extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE)
|
|
||||||
#
|
|
||||||
# tile_results <- data.frame()
|
|
||||||
# tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename))
|
|
||||||
#
|
|
||||||
# for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
|
||||||
# field_id <- field_boundaries_sf$field[field_idx]
|
|
||||||
# mean_ci <- extracted_vals[field_idx, 2]
|
|
||||||
#
|
|
||||||
# if (is.na(mean_ci)) {
|
|
||||||
# next
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# tile_results <- rbind(tile_results, data.frame(
|
|
||||||
# field = field_id,
|
|
||||||
# tile_id = tile_id_match,
|
|
||||||
# tile_file = tile_basename,
|
|
||||||
# mean_ci = round(mean_ci, 4),
|
|
||||||
# stringsAsFactors = FALSE
|
|
||||||
# ))
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# return(tile_results)
|
|
||||||
#
|
|
||||||
# }, error = function(e) {
|
|
||||||
# message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message))
|
|
||||||
# return(data.frame())
|
|
||||||
# })
|
|
||||||
# }
|
|
||||||
|
|
||||||
# calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf,
|
|
||||||
# harvesting_data, cumulative_CI_vals_dir,
|
|
||||||
# weekly_CI_mosaic, reports_dir, current_week, year,
|
|
||||||
# tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") {
|
|
||||||
# # Farm-level KPI calculation using tile-based extraction (alternative approach)
|
|
||||||
# # [Implementation kept as reference for alternative calculation method]
|
|
||||||
# }
|
|
||||||
666
r_app/80_utils_agronomic_support.R
Normal file
666
r_app/80_utils_agronomic_support.R
Normal file
|
|
@ -0,0 +1,666 @@
|
||||||
|
# 80_UTILS_AGRONOMIC_SUPPORT.R
|
||||||
|
# ============================================================================
|
||||||
|
# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support)
|
||||||
|
#
|
||||||
|
# Contains all 6 AURA KPI calculation functions and helpers:
|
||||||
|
# - Field uniformity KPI (CV-based, spatial autocorrelation)
|
||||||
|
# - Area change KPI (week-over-week CI changes)
|
||||||
|
# - TCH forecasted KPI (tonnage projections from harvest data)
|
||||||
|
# - Growth decline KPI (trend analysis)
|
||||||
|
# - Weed presence KPI (field fragmentation detection)
|
||||||
|
# - Gap filling KPI (interpolation quality)
|
||||||
|
# - KPI reporting (summary tables, field details, text interpretation)
|
||||||
|
# - KPI export (Excel, RDS, data export)
|
||||||
|
#
|
||||||
|
# Orchestrator: calculate_all_kpis()
|
||||||
|
# Dependencies: 00_common_utils.R (safe_log), sourced from common
|
||||||
|
# Used by: 80_calculate_kpis.R (when client_type == "agronomic_support")
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
library(terra)
|
||||||
|
library(sf)
|
||||||
|
library(dplyr)
|
||||||
|
library(tidyr)
|
||||||
|
library(readxl)
|
||||||
|
library(writexl)
|
||||||
|
library(spdep)
|
||||||
|
library(caret)
|
||||||
|
library(CAST)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R)
|
||||||
|
# ============================================================================
|
||||||
|
# The following helper functions have been moved to 80_utils_common.R:
|
||||||
|
# - calculate_cv()
|
||||||
|
# - calculate_change_percentages()
|
||||||
|
# - calculate_spatial_autocorrelation()
|
||||||
|
# - extract_ci_values()
|
||||||
|
# - calculate_week_numbers()
|
||||||
|
# - load_field_ci_raster()
|
||||||
|
# - load_weekly_ci_mosaic()
|
||||||
|
# - prepare_predictions()
|
||||||
|
#
|
||||||
|
# These are now sourced from common utils and shared by all client types.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
#' Prepare harvest predictions and ensure proper alignment with field data
|
||||||
|
prepare_predictions <- function(harvest_model, field_data, scenario = "optimistic") {
|
||||||
|
if (is.null(harvest_model) || is.null(field_data)) {
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
scenario_factor <- switch(scenario,
|
||||||
|
"pessimistic" = 0.85,
|
||||||
|
"realistic" = 1.0,
|
||||||
|
"optimistic" = 1.15,
|
||||||
|
1.0)
|
||||||
|
|
||||||
|
predictions <- field_data %>%
|
||||||
|
mutate(tch_forecasted = field_data$mean_ci * scenario_factor)
|
||||||
|
|
||||||
|
return(predictions)
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste("Error preparing predictions:", e$message))
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# AURA KPI CALCULATION FUNCTIONS (6 KPIS)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
#' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation
|
||||||
|
#'
|
||||||
|
#' Measures how uniform crop development is across the field.
|
||||||
|
#' Low CV + high positive Moran's I = excellent uniformity
|
||||||
|
#'
|
||||||
|
#' @param ci_pixels_by_field List of CI pixel arrays for each field
|
||||||
|
#' @param field_boundaries_sf SF object with field geometries
|
||||||
|
#' @param ci_band Raster band with CI values
|
||||||
|
#'
|
||||||
|
#' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation
|
||||||
|
calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL) {
|
||||||
|
result <- data.frame(
|
||||||
|
field_idx = integer(),
|
||||||
|
cv_value = numeric(),
|
||||||
|
morans_i = numeric(),
|
||||||
|
uniformity_score = numeric(),
|
||||||
|
interpretation = character(),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
||||||
|
ci_pixels <- ci_pixels_by_field[[field_idx]]
|
||||||
|
|
||||||
|
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
|
||||||
|
result <- rbind(result, data.frame(
|
||||||
|
field_idx = field_idx,
|
||||||
|
cv_value = NA_real_,
|
||||||
|
morans_i = NA_real_,
|
||||||
|
uniformity_score = NA_real_,
|
||||||
|
interpretation = "No data",
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
))
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
cv_val <- calculate_cv(ci_pixels)
|
||||||
|
|
||||||
|
morans_i <- NA_real_
|
||||||
|
if (!is.null(ci_band)) {
|
||||||
|
morans_result <- calculate_spatial_autocorrelation(ci_pixels, field_boundaries_sf[field_idx, ])
|
||||||
|
if (is.list(morans_result)) {
|
||||||
|
morans_i <- morans_result$morans_i
|
||||||
|
} else {
|
||||||
|
morans_i <- morans_result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Normalize CV (0-1 scale, invert so lower CV = higher score)
|
||||||
|
cv_normalized <- min(cv_val / 0.3, 1) # 0.3 = threshold for CV
|
||||||
|
cv_score <- 1 - cv_normalized
|
||||||
|
|
||||||
|
# Normalize Moran's I (-1 to 1 scale, shift to 0-1)
|
||||||
|
morans_normalized <- if (!is.na(morans_i)) {
|
||||||
|
(morans_i + 1) / 2
|
||||||
|
} else {
|
||||||
|
0.5
|
||||||
|
}
|
||||||
|
|
||||||
|
uniformity_score <- 0.7 * cv_score + 0.3 * morans_normalized
|
||||||
|
|
||||||
|
# Interpretation
|
||||||
|
if (is.na(cv_val)) {
|
||||||
|
interpretation <- "No data"
|
||||||
|
} else if (cv_val < 0.08) {
|
||||||
|
interpretation <- "Excellent uniformity"
|
||||||
|
} else if (cv_val < 0.15) {
|
||||||
|
interpretation <- "Good uniformity"
|
||||||
|
} else if (cv_val < 0.25) {
|
||||||
|
interpretation <- "Acceptable uniformity"
|
||||||
|
} else if (cv_val < 0.4) {
|
||||||
|
interpretation <- "Poor uniformity"
|
||||||
|
} else {
|
||||||
|
interpretation <- "Very poor uniformity"
|
||||||
|
}
|
||||||
|
|
||||||
|
result <- rbind(result, data.frame(
|
||||||
|
field_idx = field_idx,
|
||||||
|
cv_value = cv_val,
|
||||||
|
morans_i = morans_i,
|
||||||
|
uniformity_score = round(uniformity_score, 3),
|
||||||
|
interpretation = interpretation,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' KPI 2: Calculate area change metric (week-over-week CI changes)
|
||||||
|
#'
|
||||||
|
#' Tracks the percentage change in CI between current and previous week
|
||||||
|
#'
|
||||||
|
#' @param current_stats Current week field statistics (from extract_field_statistics_from_ci)
|
||||||
|
#' @param previous_stats Previous week field statistics
|
||||||
|
#'
|
||||||
|
#' @return Data frame with field-level CI changes
|
||||||
|
calculate_area_change_kpi <- function(current_stats, previous_stats) {
|
||||||
|
result <- calculate_change_percentages(current_stats, previous_stats)
|
||||||
|
|
||||||
|
# Add interpretation
|
||||||
|
result$interpretation <- NA_character_
|
||||||
|
|
||||||
|
for (i in seq_len(nrow(result))) {
|
||||||
|
change <- result$mean_ci_pct_change[i]
|
||||||
|
|
||||||
|
if (is.na(change)) {
|
||||||
|
result$interpretation[i] <- "No previous data"
|
||||||
|
} else if (change > 15) {
|
||||||
|
result$interpretation[i] <- "Rapid growth"
|
||||||
|
} else if (change > 5) {
|
||||||
|
result$interpretation[i] <- "Positive growth"
|
||||||
|
} else if (change > -5) {
|
||||||
|
result$interpretation[i] <- "Stable"
|
||||||
|
} else if (change > -15) {
|
||||||
|
result$interpretation[i] <- "Declining"
|
||||||
|
} else {
|
||||||
|
result$interpretation[i] <- "Rapid decline"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare)
|
||||||
|
#'
|
||||||
|
#' Projects final harvest tonnage based on CI growth trajectory
|
||||||
|
#'
|
||||||
|
#' @param field_statistics Current field statistics
|
||||||
|
#' @param harvesting_data Historical harvest data (with yield observations)
|
||||||
|
#' @param field_boundaries_sf Field geometries
|
||||||
|
#'
|
||||||
|
#' @return Data frame with field-level TCH forecasts
|
||||||
|
calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) {
|
||||||
|
result <- data.frame(
|
||||||
|
field_idx = field_statistics$field_idx,
|
||||||
|
mean_ci = field_statistics$mean_ci,
|
||||||
|
tch_forecasted = NA_real_,
|
||||||
|
tch_lower_bound = NA_real_,
|
||||||
|
tch_upper_bound = NA_real_,
|
||||||
|
confidence = NA_character_,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Base TCH model: TCH = 50 + (CI * 10)
|
||||||
|
# This is a simplified model; production use should include more variables
|
||||||
|
|
||||||
|
for (i in seq_len(nrow(result))) {
|
||||||
|
if (is.na(result$mean_ci[i])) {
|
||||||
|
result$confidence[i] <- "No data"
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
ci_val <- result$mean_ci[i]
|
||||||
|
|
||||||
|
# Simple linear model
|
||||||
|
tch_est <- 50 + (ci_val * 10)
|
||||||
|
|
||||||
|
# Confidence interval based on CI range
|
||||||
|
tch_lower <- tch_est * 0.85
|
||||||
|
tch_upper <- tch_est * 1.15
|
||||||
|
|
||||||
|
result$tch_forecasted[i] <- round(tch_est, 2)
|
||||||
|
result$tch_lower_bound[i] <- round(tch_lower, 2)
|
||||||
|
result$tch_upper_bound[i] <- round(tch_upper, 2)
|
||||||
|
result$confidence[i] <- "Medium"
|
||||||
|
}
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' KPI 4: Calculate growth decline indicator
|
||||||
|
#'
|
||||||
|
#' Identifies fields with negative growth trajectory
|
||||||
|
#'
|
||||||
|
#' @param ci_values_list List of CI values for each field (multiple weeks)
|
||||||
|
#'
|
||||||
|
#' @return Data frame with field-level decline indicators
|
||||||
|
calculate_growth_decline_kpi <- function(ci_values_list) {
|
||||||
|
result <- data.frame(
|
||||||
|
field_idx = seq_len(length(ci_values_list)),
|
||||||
|
four_week_trend = NA_real_,
|
||||||
|
trend_interpretation = NA_character_,
|
||||||
|
decline_severity = NA_character_,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
for (field_idx in seq_len(length(ci_values_list))) {
|
||||||
|
ci_vals <- ci_values_list[[field_idx]]
|
||||||
|
|
||||||
|
if (is.null(ci_vals) || length(ci_vals) < 2) {
|
||||||
|
result$trend_interpretation[field_idx] <- "Insufficient data"
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
ci_vals <- ci_vals[!is.na(ci_vals)]
|
||||||
|
if (length(ci_vals) < 2) {
|
||||||
|
result$trend_interpretation[field_idx] <- "Insufficient data"
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate linear trend
|
||||||
|
weeks <- seq_along(ci_vals)
|
||||||
|
lm_fit <- lm(ci_vals ~ weeks)
|
||||||
|
slope <- coef(lm_fit)["weeks"]
|
||||||
|
|
||||||
|
result$four_week_trend[field_idx] <- round(as.numeric(slope), 3)
|
||||||
|
|
||||||
|
if (slope > 0.1) {
|
||||||
|
result$trend_interpretation[field_idx] <- "Strong growth"
|
||||||
|
result$decline_severity[field_idx] <- "None"
|
||||||
|
} else if (slope > 0) {
|
||||||
|
result$trend_interpretation[field_idx] <- "Weak growth"
|
||||||
|
result$decline_severity[field_idx] <- "None"
|
||||||
|
} else if (slope > -0.1) {
|
||||||
|
result$trend_interpretation[field_idx] <- "Slight decline"
|
||||||
|
result$decline_severity[field_idx] <- "Low"
|
||||||
|
} else if (slope > -0.3) {
|
||||||
|
result$trend_interpretation[field_idx] <- "Moderate decline"
|
||||||
|
result$decline_severity[field_idx] <- "Medium"
|
||||||
|
} else {
|
||||||
|
result$trend_interpretation[field_idx] <- "Strong decline"
|
||||||
|
result$decline_severity[field_idx] <- "High"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' KPI 5: Calculate weed presence indicator
|
||||||
|
#'
|
||||||
|
#' Detects field fragmentation/patchiness (potential weed/pest pressure)
|
||||||
|
#'
|
||||||
|
#' @param ci_pixels_by_field List of CI pixel arrays for each field
|
||||||
|
#'
|
||||||
|
#' @return Data frame with fragmentation indicators
|
||||||
|
calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
|
||||||
|
result <- data.frame(
|
||||||
|
field_idx = seq_len(length(ci_pixels_by_field)),
|
||||||
|
cv_value = NA_real_,
|
||||||
|
low_ci_percent = NA_real_,
|
||||||
|
fragmentation_index = NA_real_,
|
||||||
|
weed_pressure_risk = NA_character_,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
for (field_idx in seq_len(length(ci_pixels_by_field))) {
|
||||||
|
ci_pixels <- ci_pixels_by_field[[field_idx]]
|
||||||
|
|
||||||
|
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
|
||||||
|
result$weed_pressure_risk[field_idx] <- "No data"
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
ci_pixels <- ci_pixels[!is.na(ci_pixels)]
|
||||||
|
if (length(ci_pixels) == 0) {
|
||||||
|
result$weed_pressure_risk[field_idx] <- "No data"
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
cv_val <- calculate_cv(ci_pixels)
|
||||||
|
low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100
|
||||||
|
fragmentation <- cv_val * low_ci_pct / 100
|
||||||
|
|
||||||
|
result$cv_value[field_idx] <- cv_val
|
||||||
|
result$low_ci_percent[field_idx] <- round(low_ci_pct, 2)
|
||||||
|
result$fragmentation_index[field_idx] <- round(fragmentation, 3)
|
||||||
|
|
||||||
|
if (is.na(fragmentation)) {
|
||||||
|
result$weed_pressure_risk[field_idx] <- "No data"
|
||||||
|
} else if (fragmentation > 0.15) {
|
||||||
|
result$weed_pressure_risk[field_idx] <- "High"
|
||||||
|
} else if (fragmentation > 0.08) {
|
||||||
|
result$weed_pressure_risk[field_idx] <- "Medium"
|
||||||
|
} else if (fragmentation > 0.04) {
|
||||||
|
result$weed_pressure_risk[field_idx] <- "Low"
|
||||||
|
} else {
|
||||||
|
result$weed_pressure_risk[field_idx] <- "Minimal"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' KPI 6: Calculate gap filling quality (data interpolation success)
|
||||||
|
#'
|
||||||
|
#' Measures how well cloud/missing data was interpolated during growth model
|
||||||
|
#'
|
||||||
|
#' @param ci_rds_path Path to combined CI RDS file (before/after interpolation)
|
||||||
|
#'
|
||||||
|
#' @return Data frame with gap-filling quality metrics
|
||||||
|
calculate_gap_filling_kpi <- function(ci_rds_path) {
|
||||||
|
# If ci_rds_path is NULL or not a valid path, return placeholder
|
||||||
|
if (is.null(ci_rds_path) || !is.character(ci_rds_path) || length(ci_rds_path) == 0) {
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
# If ci_rds_path is a directory, find the cumulative CI file
|
||||||
|
if (dir.exists(ci_rds_path)) {
|
||||||
|
ci_files <- list.files(ci_rds_path, pattern = "^All_pivots.*\\.rds$", full.names = TRUE)
|
||||||
|
if (length(ci_files) == 0) {
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
ci_rds_path <- ci_files[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!file.exists(ci_rds_path)) {
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
ci_data <- readRDS(ci_rds_path)
|
||||||
|
|
||||||
|
# ci_data should be a wide matrix: fields × weeks
|
||||||
|
# NA values = missing data before interpolation
|
||||||
|
# (Gap filling is done during growth model stage)
|
||||||
|
|
||||||
|
result <- data.frame(
|
||||||
|
field_idx = seq_len(nrow(ci_data)),
|
||||||
|
na_percent_pre_interpolation = NA_real_,
|
||||||
|
na_percent_post_interpolation = NA_real_,
|
||||||
|
gap_filling_success = NA_character_,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
for (field_idx in seq_len(nrow(ci_data))) {
|
||||||
|
na_count <- sum(is.na(ci_data[field_idx, ]))
|
||||||
|
na_pct <- na_count / ncol(ci_data) * 100
|
||||||
|
|
||||||
|
if (na_pct == 0) {
|
||||||
|
result$gap_filling_success[field_idx] <- "No gaps (100% data)"
|
||||||
|
} else if (na_pct < 10) {
|
||||||
|
result$gap_filling_success[field_idx] <- "Excellent"
|
||||||
|
} else if (na_pct < 25) {
|
||||||
|
result$gap_filling_success[field_idx] <- "Good"
|
||||||
|
} else if (na_pct < 40) {
|
||||||
|
result$gap_filling_success[field_idx] <- "Fair"
|
||||||
|
} else {
|
||||||
|
result$gap_filling_success[field_idx] <- "Poor"
|
||||||
|
}
|
||||||
|
|
||||||
|
result$na_percent_pre_interpolation[field_idx] <- round(na_pct, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste("Error calculating gap filling KPI:", e$message))
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# KPI ORCHESTRATOR AND REPORTING
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
#' Create summary tables for all 6 KPIs
|
||||||
|
#'
|
||||||
|
#' @param all_kpis List containing results from all 6 KPI functions
|
||||||
|
#'
|
||||||
|
#' @return List of summary data frames ready for reporting
|
||||||
|
create_summary_tables <- function(all_kpis) {
|
||||||
|
kpi_summary <- list(
|
||||||
|
uniformity = all_kpis$uniformity %>%
|
||||||
|
select(field_idx, cv_value, morans_i, uniformity_score, interpretation),
|
||||||
|
|
||||||
|
area_change = all_kpis$area_change %>%
|
||||||
|
select(field_idx, mean_ci_pct_change, interpretation),
|
||||||
|
|
||||||
|
tch_forecast = all_kpis$tch_forecasted %>%
|
||||||
|
select(field_idx, mean_ci, tch_forecasted, tch_lower_bound, tch_upper_bound, confidence),
|
||||||
|
|
||||||
|
growth_decline = all_kpis$growth_decline %>%
|
||||||
|
select(field_idx, four_week_trend, trend_interpretation, decline_severity),
|
||||||
|
|
||||||
|
weed_pressure = all_kpis$weed_presence %>%
|
||||||
|
select(field_idx, fragmentation_index, weed_pressure_risk),
|
||||||
|
|
||||||
|
gap_filling = if (!is.null(all_kpis$gap_filling)) {
|
||||||
|
all_kpis$gap_filling %>%
|
||||||
|
select(field_idx, na_percent_pre_interpolation, gap_filling_success)
|
||||||
|
} else {
|
||||||
|
NULL
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return(kpi_summary)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Create detailed field-by-field KPI report
|
||||||
|
#'
|
||||||
|
#' @param field_df Data frame with field identifiers and acreage
|
||||||
|
#' @param all_kpis List with all KPI results
|
||||||
|
#' @param field_boundaries_sf SF object with field boundaries
|
||||||
|
#'
|
||||||
|
#' @return Data frame with one row per field, all KPI columns
|
||||||
|
create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) {
|
||||||
|
result <- field_df %>%
|
||||||
|
left_join(
|
||||||
|
all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_interpretation = interpretation),
|
||||||
|
by = c("field_idx")
|
||||||
|
) %>%
|
||||||
|
left_join(
|
||||||
|
all_kpis$area_change %>% select(field_idx, mean_ci_pct_change),
|
||||||
|
by = c("field_idx")
|
||||||
|
) %>%
|
||||||
|
left_join(
|
||||||
|
all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted),
|
||||||
|
by = c("field_idx")
|
||||||
|
) %>%
|
||||||
|
left_join(
|
||||||
|
all_kpis$growth_decline %>% select(field_idx, decline_severity),
|
||||||
|
by = c("field_idx")
|
||||||
|
) %>%
|
||||||
|
left_join(
|
||||||
|
all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk),
|
||||||
|
by = c("field_idx")
|
||||||
|
)
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Generate KPI text interpretation for inclusion in Word report
|
||||||
|
#'
|
||||||
|
#' @param all_kpis List with all KPI results
|
||||||
|
#'
|
||||||
|
#' @return Character string with formatted KPI summary text
|
||||||
|
create_field_kpi_text <- function(all_kpis) {
|
||||||
|
text_parts <- c(
|
||||||
|
"## AURA KPI ANALYSIS SUMMARY\n",
|
||||||
|
"### Field Uniformity\n",
|
||||||
|
paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n",
|
||||||
|
"### Growth Trends\n",
|
||||||
|
paste(all_kpis$growth_decline$trend_interpretation, collapse = "; "), "\n",
|
||||||
|
"### Weed/Pest Pressure\n",
|
||||||
|
paste(all_kpis$weed_presence$weed_pressure_risk, collapse = "; "), "\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
return(paste(text_parts, collapse = ""))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Export detailed KPI data to Excel/RDS
|
||||||
|
#'
|
||||||
|
#' @param all_kpis List with all KPI results
|
||||||
|
#' @param kpi_summary List with summary tables
|
||||||
|
#' @param output_dir Directory for output files
|
||||||
|
#' @param week Week number
|
||||||
|
#' @param year Year
|
||||||
|
#'
|
||||||
|
#' @return List of output file paths
|
||||||
|
export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year) {
|
||||||
|
# Ensure output directory exists
|
||||||
|
if (!dir.exists(output_dir)) {
|
||||||
|
dir.create(output_dir, recursive = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Export all KPI tables to a single Excel file
|
||||||
|
excel_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".xlsx")
|
||||||
|
|
||||||
|
sheets <- list(
|
||||||
|
"Uniformity" = as.data.frame(kpi_summary$uniformity),
|
||||||
|
"Area_Change" = as.data.frame(kpi_summary$area_change),
|
||||||
|
"TCH_Forecast" = as.data.frame(kpi_summary$tch_forecast),
|
||||||
|
"Growth_Decline" = as.data.frame(kpi_summary$growth_decline),
|
||||||
|
"Weed_Pressure" = as.data.frame(kpi_summary$weed_pressure),
|
||||||
|
"Gap_Filling" = as.data.frame(kpi_summary$gap_filling)
|
||||||
|
)
|
||||||
|
|
||||||
|
write_xlsx(sheets, excel_file)
|
||||||
|
message(paste("✓ AURA KPI data exported to:", excel_file))
|
||||||
|
|
||||||
|
# Also export to RDS for programmatic access
|
||||||
|
rds_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".rds")
|
||||||
|
saveRDS(all_kpis, rds_file)
|
||||||
|
message(paste("✓ AURA KPI RDS exported to:", rds_file))
|
||||||
|
|
||||||
|
return(list(excel = excel_file, rds = rds_file))
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# ORCHESTRATOR FUNCTION
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
#' Calculate all 6 AURA KPIs
|
||||||
|
#'
|
||||||
|
#' Main entry point for AURA KPI calculation.
|
||||||
|
#' This function orchestrates the 6 KPI calculations and returns all results.
|
||||||
|
#'
|
||||||
|
#' @param field_boundaries_sf SF object with field geometries
|
||||||
|
#' @param current_week ISO week number (1-53)
|
||||||
|
#' @param current_year ISO week year
|
||||||
|
#' @param current_mosaic_dir Directory containing current week's mosaic
|
||||||
|
#' @param previous_mosaic_dir Directory containing previous week's mosaic (optional)
|
||||||
|
#' @param ci_rds_path Path to combined CI RDS file
|
||||||
|
#' @param harvesting_data Data frame with harvest data (optional)
|
||||||
|
#' @param output_dir Directory for KPI exports
|
||||||
|
#'
|
||||||
|
#' @return List with results from all 6 KPI functions
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' This function:
|
||||||
|
#' 1. Loads current week mosaic and extracts field statistics
|
||||||
|
#' 2. (Optionally) loads previous week mosaic for comparison metrics
|
||||||
|
#' 3. Calculates all 6 AURA KPIs
|
||||||
|
#' 4. Creates summary tables
|
||||||
|
#' 5. Exports results to Excel/RDS
|
||||||
|
#'
|
||||||
|
calculate_all_kpis <- function(
|
||||||
|
field_boundaries_sf,
|
||||||
|
current_week,
|
||||||
|
current_year,
|
||||||
|
current_mosaic_dir,
|
||||||
|
previous_mosaic_dir = NULL,
|
||||||
|
ci_rds_path = NULL,
|
||||||
|
harvesting_data = NULL,
|
||||||
|
output_dir = NULL
|
||||||
|
) {
|
||||||
|
|
||||||
|
message("\n============ AURA KPI CALCULATION (6 KPIs) ============")
|
||||||
|
|
||||||
|
# Load current week mosaic
|
||||||
|
message("Loading current week mosaic...")
|
||||||
|
current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir)
|
||||||
|
|
||||||
|
if (is.null(current_mosaic)) {
|
||||||
|
stop("Could not load current week mosaic")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract field statistics
|
||||||
|
message("Extracting field statistics from current mosaic...")
|
||||||
|
current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
|
||||||
|
ci_pixels_by_field <- extract_ci_values(current_mosaic, field_boundaries_sf)
|
||||||
|
|
||||||
|
# Load previous week mosaic (if available)
|
||||||
|
previous_stats <- NULL
|
||||||
|
if (!is.null(previous_mosaic_dir)) {
|
||||||
|
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
|
||||||
|
message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")..."))
|
||||||
|
previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir)
|
||||||
|
|
||||||
|
if (!is.null(previous_mosaic)) {
|
||||||
|
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
|
||||||
|
} else {
|
||||||
|
message("Previous week mosaic not available - skipping area change KPI")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate 6 KPIs
|
||||||
|
message("\nCalculating KPI 1: Field Uniformity...")
|
||||||
|
uniformity_kpi <- calculate_field_uniformity_kpi(ci_pixels_by_field, field_boundaries_sf, current_mosaic)
|
||||||
|
|
||||||
|
message("Calculating KPI 2: Area Change...")
|
||||||
|
if (!is.null(previous_stats)) {
|
||||||
|
area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats)
|
||||||
|
} else {
|
||||||
|
area_change_kpi <- data.frame(
|
||||||
|
field_idx = seq_len(nrow(field_boundaries_sf)),
|
||||||
|
mean_ci_pct_change = NA_real_,
|
||||||
|
interpretation = rep("No previous data", nrow(field_boundaries_sf))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
message("Calculating KPI 3: TCH Forecasted...")
|
||||||
|
tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf)
|
||||||
|
|
||||||
|
message("Calculating KPI 4: Growth Decline...")
|
||||||
|
growth_decline_kpi <- calculate_growth_decline_kpi(
|
||||||
|
list(ci_pixels_by_field) # Would need historical data for real trend
|
||||||
|
)
|
||||||
|
|
||||||
|
message("Calculating KPI 5: Weed Presence...")
|
||||||
|
weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field)
|
||||||
|
|
||||||
|
message("Calculating KPI 6: Gap Filling...")
|
||||||
|
gap_filling_kpi <- calculate_gap_filling_kpi(ci_rds_path)
|
||||||
|
|
||||||
|
# Compile results
|
||||||
|
all_kpis <- list(
|
||||||
|
uniformity = uniformity_kpi,
|
||||||
|
area_change = area_change_kpi,
|
||||||
|
tch_forecasted = tch_kpi,
|
||||||
|
growth_decline = growth_decline_kpi,
|
||||||
|
weed_presence = weed_kpi,
|
||||||
|
gap_filling = gap_filling_kpi
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create summary tables
|
||||||
|
kpi_summary <- create_summary_tables(all_kpis)
|
||||||
|
|
||||||
|
# Export
|
||||||
|
export_paths <- export_kpi_data(all_kpis, kpi_summary, output_dir, current_week, current_year)
|
||||||
|
|
||||||
|
message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year, "\n"))
|
||||||
|
|
||||||
|
return(all_kpis)
|
||||||
|
}
|
||||||
210
r_app/80_utils_cane_supply.R
Normal file
210
r_app/80_utils_cane_supply.R
Normal file
|
|
@ -0,0 +1,210 @@
|
||||||
|
# 80_UTILS_CANE_SUPPLY.R
|
||||||
|
# ============================================================================
|
||||||
|
# CANE SUPPLY CLIENT-SPECIFIC UTILITIES (SCRIPT 80 - CLIENT TYPE: cane_supply)
|
||||||
|
#
|
||||||
|
# Contains ANGATA and other cane supply-specific KPI and reporting functions.
|
||||||
|
#
|
||||||
|
# Currently, CANE_SUPPLY clients use the common utilities from 80_utils_common.R:
|
||||||
|
# - Weekly statistics (calculate_field_statistics, calculate_kpi_trends)
|
||||||
|
# - Field analysis reporting (generate_field_analysis_summary)
|
||||||
|
# - Excel export (export_field_analysis_excel)
|
||||||
|
#
|
||||||
|
# This file is structured to accommodate future ANGATA-specific functionality such as:
|
||||||
|
# - Custom yield models
|
||||||
|
# - Harvest readiness criteria
|
||||||
|
# - Supply chain integration hooks
|
||||||
|
# - ANGATA-specific alerting and messaging
|
||||||
|
#
|
||||||
|
# Orchestrator: (Placeholder - uses common functions)
|
||||||
|
# Dependencies: 00_common_utils.R, 80_utils_common.R
|
||||||
|
# Used by: 80_calculate_kpis.R (when client_type == "cane_supply")
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
library(terra)
|
||||||
|
library(sf)
|
||||||
|
library(dplyr)
|
||||||
|
library(tidyr)
|
||||||
|
library(readxl)
|
||||||
|
library(writexl)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
#' Placeholder: ANGATA harvest readiness assessment
|
||||||
|
#'
|
||||||
|
#' Future implementation will integrate ANGATA-specific harvest readiness criteria:
|
||||||
|
#' - Maturation phase detection (CI threshold-based)
|
||||||
|
#' - Field age tracking (days since planting)
|
||||||
|
#' - Weather-based ripeness indicators (if available)
|
||||||
|
#' - Historical yield correlations
|
||||||
|
#'
|
||||||
|
#' @param field_ci CI values for the field
|
||||||
|
#' @param field_age_days Days since planting
|
||||||
|
#'
|
||||||
|
#' @return Character string with harvest readiness assessment
|
||||||
|
assess_harvest_readiness <- function(field_ci, field_age_days = NULL) {
|
||||||
|
# Placeholder implementation
|
||||||
|
# Real version would check:
|
||||||
|
# - Mean CI > 3.5 (maturation threshold)
|
||||||
|
# - Age > 350 days
|
||||||
|
# - Weekly growth rate < threshold (mature plateau)
|
||||||
|
|
||||||
|
if (is.null(field_ci) || all(is.na(field_ci))) {
|
||||||
|
return("No data available")
|
||||||
|
}
|
||||||
|
|
||||||
|
mean_ci <- mean(field_ci, na.rm = TRUE)
|
||||||
|
|
||||||
|
if (mean_ci > 3.5) {
|
||||||
|
return("Ready for harvest")
|
||||||
|
} else if (mean_ci > 2.5) {
|
||||||
|
return("Approaching harvest readiness")
|
||||||
|
} else {
|
||||||
|
return("Not ready - continue monitoring")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Placeholder: ANGATA supply chain status flags
|
||||||
|
#'
|
||||||
|
#' Future implementation will add supply chain-specific status indicators:
|
||||||
|
#' - Harvest scheduling readiness
|
||||||
|
#' - Equipment availability impact
|
||||||
|
#' - Transportation/logistics flags
|
||||||
|
#' - Quality parameter flags
|
||||||
|
#'
|
||||||
|
#' @param field_analysis Data frame with field analysis results
|
||||||
|
#'
|
||||||
|
#' @return Data frame with supply chain status columns
|
||||||
|
assess_supply_chain_status <- function(field_analysis) {
|
||||||
|
# Placeholder: return field analysis as-is
|
||||||
|
# Real version would add columns for:
|
||||||
|
# - schedule_ready (bool)
|
||||||
|
# - harvest_window_days (numeric)
|
||||||
|
# - transportation_priority (char)
|
||||||
|
# - quality_flags (char)
|
||||||
|
|
||||||
|
return(field_analysis)
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
#' Orchestrate ANGATA weekly field analysis and reporting
|
||||||
|
#'
|
||||||
|
#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows.
|
||||||
|
#' Currently uses common utilities; future versions will add client-specific logic.
|
||||||
|
#'
|
||||||
|
#' @param field_boundaries_sf SF object with field geometries
|
||||||
|
#' @param current_week ISO week number (1-53)
|
||||||
|
#' @param current_year ISO week year
|
||||||
|
#' @param mosaic_dir Directory containing weekly mosaics
|
||||||
|
#' @param field_boundaries_path Path to field GeoJSON
|
||||||
|
#' @param harvesting_data Data frame with harvest data (optional)
|
||||||
|
#' @param output_dir Directory for exports
|
||||||
|
#' @param data_dir Base data directory
|
||||||
|
#'
|
||||||
|
#' @return List with field analysis results
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' This function:
|
||||||
|
#' 1. Loads weekly mosaic and extracts field statistics
|
||||||
|
#' 2. Calculates field statistics (using common utilities)
|
||||||
|
#' 3. Prepares field analysis summary
|
||||||
|
#' 4. Exports to Excel/CSV/RDS
|
||||||
|
#' 5. (Future) Applies ANGATA-specific assessments
|
||||||
|
#'
|
||||||
|
calculate_field_analysis_cane_supply <- function(
|
||||||
|
field_boundaries_sf,
|
||||||
|
current_week,
|
||||||
|
current_year,
|
||||||
|
mosaic_dir,
|
||||||
|
field_boundaries_path = NULL,
|
||||||
|
harvesting_data = NULL,
|
||||||
|
output_dir = file.path(PROJECT_DIR, "output"),
|
||||||
|
data_dir = NULL
|
||||||
|
) {
|
||||||
|
|
||||||
|
message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============")
|
||||||
|
|
||||||
|
# Load current week mosaic
|
||||||
|
message("Loading current week mosaic...")
|
||||||
|
current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year)
|
||||||
|
|
||||||
|
if (is.null(current_mosaic)) {
|
||||||
|
warning(paste("Could not load current week mosaic for week", current_week, current_year))
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract field statistics
|
||||||
|
message("Extracting field statistics from current mosaic...")
|
||||||
|
field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
|
||||||
|
|
||||||
|
# Load previous week stats for comparison
|
||||||
|
message("Loading historical data for trends...")
|
||||||
|
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
|
||||||
|
previous_stats <- NULL
|
||||||
|
|
||||||
|
previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year)
|
||||||
|
if (!is.null(previous_mosaic)) {
|
||||||
|
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate 4-week historical trend
|
||||||
|
message("Calculating field trends...")
|
||||||
|
ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds")
|
||||||
|
|
||||||
|
field_analysis <- calculate_field_statistics(
|
||||||
|
field_stats = field_stats,
|
||||||
|
previous_stats = previous_stats,
|
||||||
|
week_num = current_week,
|
||||||
|
year = current_year,
|
||||||
|
ci_rds_path = ci_rds_path,
|
||||||
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
|
harvesting_data = harvesting_data
|
||||||
|
)
|
||||||
|
|
||||||
|
if (is.null(field_analysis)) {
|
||||||
|
message("Could not generate field analysis")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generate summary
|
||||||
|
message("Generating summary statistics...")
|
||||||
|
summary_df <- generate_field_analysis_summary(field_analysis)
|
||||||
|
|
||||||
|
# Export
|
||||||
|
message("Exporting field analysis...")
|
||||||
|
export_paths <- export_field_analysis_excel(
|
||||||
|
field_analysis,
|
||||||
|
summary_df,
|
||||||
|
PROJECT_DIR,
|
||||||
|
current_week,
|
||||||
|
current_year,
|
||||||
|
output_dir
|
||||||
|
)
|
||||||
|
|
||||||
|
message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n"))
|
||||||
|
|
||||||
|
result <- list(
|
||||||
|
field_analysis = field_analysis,
|
||||||
|
summary = summary_df,
|
||||||
|
exports = export_paths
|
||||||
|
)
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# FUTURE EXTENSION POINTS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# Placeholder for ANGATA-specific utilities that may be added in future:
|
||||||
|
# - Custom yield models based on ANGATA historical data
|
||||||
|
# - Field condition thresholds specific to ANGATA growing practices
|
||||||
|
# - Integration with ANGATA harvest scheduling system
|
||||||
|
# - WhatsApp messaging templates for ANGATA supply chain stakeholders
|
||||||
|
# - Cost/benefit analysis for ANGATA operational decisions
|
||||||
|
|
||||||
|
# These functions can be added here as ANGATA requirements evolve.
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -3,7 +3,7 @@ params:
|
||||||
ref: "word-styles-reference-var1.docx"
|
ref: "word-styles-reference-var1.docx"
|
||||||
output_file: CI_report.docx
|
output_file: CI_report.docx
|
||||||
report_date: "2026-01-22"
|
report_date: "2026-01-22"
|
||||||
data_dir: "aura"
|
data_dir: "angata"
|
||||||
mail_day: "Wednesday"
|
mail_day: "Wednesday"
|
||||||
borders: FALSE
|
borders: FALSE
|
||||||
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
||||||
|
|
@ -38,20 +38,26 @@ reporting_script <- TRUE
|
||||||
|
|
||||||
# Load all packages at once with suppressPackageStartupMessages
|
# Load all packages at once with suppressPackageStartupMessages
|
||||||
suppressPackageStartupMessages({
|
suppressPackageStartupMessages({
|
||||||
library(here)
|
# File path handling
|
||||||
library(sf)
|
library(here) # For relative path resolution (platform-independent file paths)
|
||||||
library(terra)
|
|
||||||
library(tidyverse)
|
# Spatial data handling
|
||||||
library(tmap)
|
library(sf) # For reading/manipulating field boundaries (GeoJSON)
|
||||||
library(lubridate)
|
library(terra) # For raster operations (reading mosaic TIFFs for visualization)
|
||||||
library(zoo)
|
|
||||||
library(rsample)
|
# Data manipulation
|
||||||
library(caret)
|
library(tidyverse) # For dplyr, ggplot2, tidyr (data wrangling and visualization)
|
||||||
library(randomForest)
|
library(tidyr) # For data reshaping (pivot_longer, pivot_wider for wide-to-long conversion)
|
||||||
library(CAST)
|
library(lubridate) # For date/time operations (week extraction, date formatting)
|
||||||
library(knitr)
|
library(zoo) # For zoo objects (time series manipulation, na.locf for gap filling)
|
||||||
library(tidyr)
|
|
||||||
library(flextable)
|
# Visualization
|
||||||
|
library(tmap) # For interactive maps (field boundary visualization)
|
||||||
|
|
||||||
|
# Reporting
|
||||||
|
library(knitr) # For R Markdown document generation (code execution and output)
|
||||||
|
library(flextable) # For formatted tables in Word output (professional table styling)
|
||||||
|
library(officer) # For Word document manipulation (custom formatting, headers, footers)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Load custom utility functions
|
# Load custom utility functions
|
||||||
|
|
@ -101,10 +107,14 @@ project_dir <- params$data_dir
|
||||||
# Source project parameters with error handling
|
# Source project parameters with error handling
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source(here::here("r_app", "parameters_project.R"))
|
source(here::here("r_app", "parameters_project.R"))
|
||||||
|
source(here::here("r_app", "00_common_utils.R"))
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
stop("Error loading parameters_project.R: ", e$message)
|
stop("Error loading project utilities: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Load centralized paths
|
||||||
|
paths <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
# Log initial configuration
|
# Log initial configuration
|
||||||
safe_log("Starting the R Markdown script with KPIs")
|
safe_log("Starting the R Markdown script with KPIs")
|
||||||
safe_log(paste("mail_day params:", params$mail_day))
|
safe_log(paste("mail_day params:", params$mail_day))
|
||||||
|
|
@ -114,15 +124,14 @@ safe_log(paste("mail_day variable:", mail_day))
|
||||||
|
|
||||||
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
|
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||||
# Primary expected directory inside the laravel storage
|
# Primary expected directory from centralized paths
|
||||||
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
kpi_data_dir <- paths$kpi_reports_dir
|
||||||
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
||||||
|
|
||||||
# Calculate current week from report_date using ISO 8601 week numbering
|
# Calculate current week from report_date using ISO 8601 week numbering
|
||||||
report_date_obj <- as.Date(report_date)
|
current_week <- as.numeric(format(as.Date(report_date), "%V"))
|
||||||
current_week <- lubridate::isoweek(report_date_obj)
|
current_year <- as.numeric(format(as.Date(report_date), "%G"))
|
||||||
current_iso_year <- lubridate::isoyear(report_date_obj)
|
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
|
||||||
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_iso_year)
|
|
||||||
|
|
||||||
# Candidate filenames we expect (exact and common variants)
|
# Candidate filenames we expect (exact and common variants)
|
||||||
expected_summary_names <- c(
|
expected_summary_names <- c(
|
||||||
|
|
@ -346,50 +355,17 @@ safe_log(paste("Week range:", week_start, "to", week_end))
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||||
# Load CI index data with error handling
|
# Load CI quadrant data for field-level analysis
|
||||||
tryCatch({
|
tryCatch({
|
||||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||||
|
|
||||||
safe_log("Successfully loaded CI quadrant data")
|
safe_log("Successfully loaded CI quadrant data")
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
stop("Error loading CI quadrant data: ", e$message)
|
stop("Error loading CI quadrant data: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Get file paths for different weeks using the utility function
|
# NOTE: Overview maps skipped for this report
|
||||||
tryCatch({
|
# Individual field sections load their own per-field mosaics directly
|
||||||
path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
|
```
|
||||||
path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
|
|
||||||
path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
|
|
||||||
path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
|
|
||||||
|
|
||||||
# Log the calculated paths
|
|
||||||
safe_log("Required mosaic paths:")
|
|
||||||
safe_log(paste("Path to current week:", path_to_week_current))
|
|
||||||
safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
|
|
||||||
safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
|
|
||||||
safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
|
|
||||||
|
|
||||||
# Validate that files exist
|
|
||||||
if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
|
|
||||||
if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
|
|
||||||
if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
|
|
||||||
if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
|
|
||||||
|
|
||||||
# Load raster data with terra functions
|
|
||||||
CI <- terra::rast(path_to_week_current)$CI
|
|
||||||
CI_m1 <- terra::rast(path_to_week_minus_1)$CI
|
|
||||||
CI_m2 <- terra::rast(path_to_week_minus_2)$CI
|
|
||||||
CI_m3 <- terra::rast(path_to_week_minus_3)$CI
|
|
||||||
|
|
||||||
# DEBUG: Check which weeks were actually loaded and their data ranges
|
|
||||||
safe_log(paste("DEBUG - CI (current) range:", paste(terra::minmax(CI)[,1], collapse=" to ")))
|
|
||||||
safe_log(paste("DEBUG - CI_m1 (week-1) range:", paste(terra::minmax(CI_m1)[,1], collapse=" to ")))
|
|
||||||
safe_log(paste("DEBUG - CI_m2 (week-2) range:", paste(terra::minmax(CI_m2)[,1], collapse=" to ")))
|
|
||||||
safe_log(paste("DEBUG - CI_m3 (week-3) range:", paste(terra::minmax(CI_m3)[,1], collapse=" to ")))
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Error loading raster data: ", e$message)
|
|
||||||
})
|
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r compute_benchmarks_once, include=FALSE}
|
```{r compute_benchmarks_once, include=FALSE}
|
||||||
|
|
@ -450,7 +426,7 @@ if (exists("summary_tables") && !is.null(summary_tables)) {
|
||||||
|
|
||||||
## Executive Summary - Key Performance Indicators
|
## Executive Summary - Key Performance Indicators
|
||||||
|
|
||||||
```{r combined_kpi_table, echo=FALSE}
|
```{r combined_kpi_table, echo=FALSE, results='asis'}
|
||||||
# Combine all KPI tables into a single table with standardized column names
|
# Combine all KPI tables into a single table with standardized column names
|
||||||
display_names <- c(
|
display_names <- c(
|
||||||
field_uniformity_summary = "Field Uniformity",
|
field_uniformity_summary = "Field Uniformity",
|
||||||
|
|
@ -504,7 +480,7 @@ ft
|
||||||
|
|
||||||
## Field Alerts
|
## Field Alerts
|
||||||
|
|
||||||
```{r field_alerts_table, echo=FALSE}
|
```{r field_alerts_table, echo=FALSE, results='asis'}
|
||||||
# Generate alerts for all fields
|
# Generate alerts for all fields
|
||||||
generate_field_alerts <- function(field_details_table) {
|
generate_field_alerts <- function(field_details_table) {
|
||||||
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
|
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
|
||||||
|
|
@ -610,42 +586,13 @@ if (exists("field_details_table") && !is.null(field_details_table)) {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```{r data, message=TRUE, warning=TRUE, include=FALSE}
|
||||||
```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE}
|
# Verify CI quadrant data is loaded from load_ci_data chunk
|
||||||
# Calculate difference rasters for comparisons
|
if (!exists("CI_quadrant") || is.null(CI_quadrant)) {
|
||||||
# When one week has NA values, the difference will also be NA (not zero)
|
stop("CI_quadrant data not available - check load_ci_data chunk")
|
||||||
# Initialize placeholders first to ensure they exist
|
|
||||||
last_week_dif_raster_abs <- NULL
|
|
||||||
three_week_dif_raster_abs <- NULL
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
# Always calculate differences - NA values will propagate naturally
|
|
||||||
# This way empty weeks (all NA) result in NA differences, not misleading zeros
|
|
||||||
last_week_dif_raster_abs <- (CI - CI_m1)
|
|
||||||
three_week_dif_raster_abs <- (CI - CI_m3)
|
|
||||||
|
|
||||||
safe_log("Calculated difference rasters (NA values preserved)")
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error calculating difference rasters:", e$message), "ERROR")
|
|
||||||
# Fallback: create NA placeholders if calculation fails
|
|
||||||
if (is.null(last_week_dif_raster_abs)) {
|
|
||||||
last_week_dif_raster_abs <- CI * NA
|
|
||||||
}
|
|
||||||
if (is.null(three_week_dif_raster_abs)) {
|
|
||||||
three_week_dif_raster_abs <- CI * NA
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
# Final safety check - ensure variables exist in global environment
|
|
||||||
if (is.null(last_week_dif_raster_abs)) {
|
|
||||||
last_week_dif_raster_abs <- CI * NA
|
|
||||||
safe_log("Created NA placeholder for last_week_dif_raster_abs", "WARNING")
|
|
||||||
}
|
|
||||||
if (is.null(three_week_dif_raster_abs)) {
|
|
||||||
three_week_dif_raster_abs <- CI * NA
|
|
||||||
safe_log("Created NA placeholder for three_week_dif_raster_abs", "WARNING")
|
|
||||||
}
|
}
|
||||||
|
safe_log("CI quadrant data verified for field-level analysis")
|
||||||
|
```
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
|
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
|
||||||
|
|
@ -665,76 +612,6 @@ tryCatch({
|
||||||
stop("Error loading field boundaries: ", e$message)
|
stop("Error loading field boundaries: ", e$message)
|
||||||
})
|
})
|
||||||
```
|
```
|
||||||
\newpage
|
|
||||||
|
|
||||||
## Chlorophyll Index (CI) Overview Map - Current Week
|
|
||||||
```{r render_ci_overview_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE}
|
|
||||||
# Create overview chlorophyll index map
|
|
||||||
tryCatch({
|
|
||||||
# Choose palette based on colorblind_friendly parameter
|
|
||||||
ci_palette <- if (colorblind_friendly) "viridis" else "brewer.rd_yl_gn"
|
|
||||||
|
|
||||||
# Base shape
|
|
||||||
map <- tmap::tm_shape(CI, unit = "m")
|
|
||||||
|
|
||||||
# Add raster layer with continuous spectrum (fixed scale 1-8 for consistent comparison)
|
|
||||||
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = ci_palette,
|
|
||||||
limits = c(1, 8)),
|
|
||||||
col.legend = tm_legend(title = "Chlorophyll Index (CI)",
|
|
||||||
orientation = "landscape",
|
|
||||||
position = tm_pos_out("center", "bottom")))
|
|
||||||
# Complete the map with layout and other elements
|
|
||||||
map <- map +
|
|
||||||
tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
|
||||||
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
|
||||||
tmap::tm_shape(AllPivots0) +
|
|
||||||
tmap::tm_borders(col = "black") +
|
|
||||||
tmap::tm_text("sub_field", size = 0.6, col = "black")
|
|
||||||
|
|
||||||
# Print the map
|
|
||||||
print(map)
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error creating CI overview map:", e$message), "ERROR")
|
|
||||||
plot(1, type="n", axes=FALSE, xlab="", ylab="")
|
|
||||||
text(1, 1, "Error creating CI overview map", cex=1.5)
|
|
||||||
})
|
|
||||||
|
|
||||||
```
|
|
||||||
\newpage
|
|
||||||
|
|
||||||
## Weekly Chlorophyll Index Difference Map
|
|
||||||
```{r render_ci_difference_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE}
|
|
||||||
# Create chlorophyll index difference map
|
|
||||||
tryCatch({
|
|
||||||
# Choose palette based on colorblind_friendly parameter
|
|
||||||
diff_palette <- if (colorblind_friendly) "plasma" else "brewer.rd_yl_gn"
|
|
||||||
|
|
||||||
# Base shape
|
|
||||||
map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m")
|
|
||||||
|
|
||||||
# Add raster layer with continuous spectrum (centered at 0 for difference maps, fixed scale)
|
|
||||||
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = diff_palette,
|
|
||||||
midpoint = 0,
|
|
||||||
limits = c(-3, 3)),
|
|
||||||
col.legend = tm_legend(title = "Chlorophyll Index (CI) Change",
|
|
||||||
orientation = "landscape",
|
|
||||||
position = tm_pos_out("center", "bottom")))
|
|
||||||
# Complete the map with layout and other elements
|
|
||||||
map <- map +
|
|
||||||
tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
|
||||||
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
|
||||||
tmap::tm_shape(AllPivots0) +
|
|
||||||
tmap::tm_borders(col = "black") +
|
|
||||||
tmap::tm_text("sub_field", size = 0.6, col = "black")
|
|
||||||
|
|
||||||
# Print the map
|
|
||||||
print(map)
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error creating CI difference map:", e$message), "ERROR")
|
|
||||||
plot(1, type="n", axes=FALSE, xlab="", ylab="")
|
|
||||||
text(1, 1, "Error creating CI difference map", cex=1.5)
|
|
||||||
})
|
|
||||||
```
|
|
||||||
|
|
||||||
# Section 2: Field-by-Field Analysis
|
# Section 2: Field-by-Field Analysis
|
||||||
|
|
||||||
|
|
@ -760,6 +637,23 @@ tryCatch({
|
||||||
dplyr::group_by(field) %>%
|
dplyr::group_by(field) %>%
|
||||||
dplyr::summarise(.groups = 'drop')
|
dplyr::summarise(.groups = 'drop')
|
||||||
|
|
||||||
|
# Use per-field weekly mosaic directory path from parameters_project.R
|
||||||
|
weekly_mosaic_per_field_dir <- weekly_CI_mosaic
|
||||||
|
|
||||||
|
# Helper to get week/year from a date
|
||||||
|
get_week_year <- function(date) {
|
||||||
|
list(
|
||||||
|
week = as.numeric(format(date, "%V")),
|
||||||
|
year = as.numeric(format(date, "%G"))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get week/year for current and historical weeks (local to field section)
|
||||||
|
current_ww <- get_week_year(as.Date(today))
|
||||||
|
minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1))
|
||||||
|
minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2))
|
||||||
|
minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3))
|
||||||
|
|
||||||
# Generate plots for each field
|
# Generate plots for each field
|
||||||
for(i in seq_along(AllPivots_merged$field)) {
|
for(i in seq_along(AllPivots_merged$field)) {
|
||||||
field_name <- AllPivots_merged$field[i]
|
field_name <- AllPivots_merged$field[i]
|
||||||
|
|
@ -775,15 +669,71 @@ tryCatch({
|
||||||
cat("\\newpage\n\n")
|
cat("\\newpage\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
# Call ci_plot with explicit parameters (ci_plot will generate its own header)
|
# Load per-field mosaics directly for this field
|
||||||
|
field_CI <- NULL
|
||||||
|
field_CI_m1 <- NULL
|
||||||
|
field_CI_m2 <- NULL
|
||||||
|
field_CI_m3 <- NULL
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Load per-field mosaic for current week
|
||||||
|
per_field_path_current <- get_per_field_mosaic_path(
|
||||||
|
weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year
|
||||||
|
)
|
||||||
|
if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) {
|
||||||
|
field_CI <- terra::rast(per_field_path_current)[["CI"]]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load per-field mosaic for week-1
|
||||||
|
per_field_path_m1 <- get_per_field_mosaic_path(
|
||||||
|
weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year
|
||||||
|
)
|
||||||
|
if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) {
|
||||||
|
field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load per-field mosaic for week-2
|
||||||
|
per_field_path_m2 <- get_per_field_mosaic_path(
|
||||||
|
weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year
|
||||||
|
)
|
||||||
|
if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) {
|
||||||
|
field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load per-field mosaic for week-3
|
||||||
|
per_field_path_m3 <- get_per_field_mosaic_path(
|
||||||
|
weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year
|
||||||
|
)
|
||||||
|
if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) {
|
||||||
|
field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]]
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG")
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING")
|
||||||
|
})
|
||||||
|
|
||||||
|
# Calculate difference rasters from per-field data (local to this field)
|
||||||
|
last_week_dif_raster_field <- NULL
|
||||||
|
three_week_dif_raster_field <- NULL
|
||||||
|
|
||||||
|
if (!is.null(field_CI) && !is.null(field_CI_m1)) {
|
||||||
|
last_week_dif_raster_field <- field_CI - field_CI_m1
|
||||||
|
}
|
||||||
|
if (!is.null(field_CI) && !is.null(field_CI_m3)) {
|
||||||
|
three_week_dif_raster_field <- field_CI - field_CI_m3
|
||||||
|
}
|
||||||
|
|
||||||
|
# Call ci_plot with field-specific rasters
|
||||||
ci_plot(
|
ci_plot(
|
||||||
pivotName = field_name,
|
pivotName = field_name,
|
||||||
field_boundaries = AllPivots0,
|
field_boundaries = AllPivots0,
|
||||||
current_ci = CI,
|
current_ci = field_CI,
|
||||||
ci_minus_1 = CI_m1,
|
ci_minus_1 = field_CI_m1,
|
||||||
ci_minus_2 = CI_m2,
|
ci_minus_2 = field_CI_m2,
|
||||||
last_week_diff = last_week_dif_raster_abs,
|
last_week_diff = last_week_dif_raster_field,
|
||||||
three_week_diff = three_week_dif_raster_abs,
|
three_week_diff = three_week_dif_raster_field,
|
||||||
harvesting_data = harvesting_data,
|
harvesting_data = harvesting_data,
|
||||||
week = week,
|
week = week,
|
||||||
week_minus_1 = week_minus_1,
|
week_minus_1 = week_minus_1,
|
||||||
|
|
@ -882,9 +832,9 @@ tryCatch({
|
||||||
|
|
||||||
The following table provides a comprehensive overview of all monitored fields with their key performance metrics from the KPI analysis.
|
The following table provides a comprehensive overview of all monitored fields with their key performance metrics from the KPI analysis.
|
||||||
|
|
||||||
```{r detailed_field_table, echo=FALSE}
|
```{r detailed_field_table, echo=FALSE, results='asis'}
|
||||||
# Load CI quadrant data to get field ages
|
# Load CI quadrant data to get field ages
|
||||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||||
|
|
||||||
# Identify the current season for each field based on report_date
|
# Identify the current season for each field based on report_date
|
||||||
# The current season is the one where the report_date falls within or shortly after the season
|
# The current season is the one where the report_date falls within or shortly after the season
|
||||||
|
|
@ -1027,7 +977,7 @@ Comparing the current season to these lines helps assess whether crop growth is
|
||||||
\newpage
|
\newpage
|
||||||
## Report Metadata
|
## Report Metadata
|
||||||
|
|
||||||
```{r report_metadata, echo=FALSE}
|
```{r report_metadata, echo=FALSE, results='asis'}
|
||||||
metadata_info <- data.frame(
|
metadata_info <- data.frame(
|
||||||
Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"),
|
Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"),
|
||||||
Value = c(
|
Value = c(
|
||||||
|
|
|
||||||
|
|
@ -1,584 +0,0 @@
|
||||||
---
|
|
||||||
params:
|
|
||||||
ref: "word-styles-reference-var1.docx"
|
|
||||||
output_file: CI_report.docx
|
|
||||||
report_date: "2025-09-30"
|
|
||||||
data_dir: "aura"
|
|
||||||
mail_day: "Wednesday"
|
|
||||||
borders: FALSE
|
|
||||||
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
|
||||||
colorblind_friendly: TRUE # use colorblind-friendly palettes (viridis/plasma)
|
|
||||||
facet_by_season: FALSE # facet CI trend plots by season instead of overlaying
|
|
||||||
x_axis_unit: "days" # x-axis unit for trend plots: "days" or "weeks"
|
|
||||||
output:
|
|
||||||
word_document:
|
|
||||||
reference_docx: !expr file.path("word-styles-reference-var1.docx")
|
|
||||||
toc: no
|
|
||||||
editor_options:
|
|
||||||
chunk_output_type: console
|
|
||||||
---
|
|
||||||
|
|
||||||
```{r setup_parameters, include=FALSE}
|
|
||||||
# Set up basic report parameters from input values
|
|
||||||
report_date <- params$report_date
|
|
||||||
mail_day <- params$mail_day
|
|
||||||
borders <- params$borders
|
|
||||||
ci_plot_type <- params$ci_plot_type
|
|
||||||
colorblind_friendly <- params$colorblind_friendly
|
|
||||||
facet_by_season <- params$facet_by_season
|
|
||||||
x_axis_unit <- params$x_axis_unit
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE}
|
|
||||||
# Configure knitr options
|
|
||||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
|
||||||
|
|
||||||
# Set flag for reporting scripts to use pivot.geojson instead of pivot_2.geojson
|
|
||||||
reporting_script <- TRUE
|
|
||||||
|
|
||||||
# Load all packages at once with suppressPackageStartupMessages
|
|
||||||
suppressPackageStartupMessages({
|
|
||||||
library(here)
|
|
||||||
library(sf)
|
|
||||||
library(terra)
|
|
||||||
library(tidyverse)
|
|
||||||
library(tmap)
|
|
||||||
library(lubridate)
|
|
||||||
library(zoo)
|
|
||||||
library(rsample)
|
|
||||||
library(caret)
|
|
||||||
library(randomForest)
|
|
||||||
library(CAST)
|
|
||||||
library(knitr)
|
|
||||||
library(tidyr)
|
|
||||||
library(flextable)
|
|
||||||
library(officer)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Load custom utility functions
|
|
||||||
tryCatch({
|
|
||||||
source("report_utils.R")
|
|
||||||
}, error = function(e) {
|
|
||||||
message(paste("Error loading report_utils.R:", e$message))
|
|
||||||
# Try alternative path if the first one fails
|
|
||||||
tryCatch({
|
|
||||||
source(here::here("r_app", "report_utils.R"))
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Could not load report_utils.R from either location: ", e$message)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
# Function to determine field priority level based on CV and Moran's I
|
|
||||||
# Returns: 1=Urgent, 2=Monitor, 3=No stress
|
|
||||||
get_field_priority_level <- function(cv, morans_i) {
|
|
||||||
# Handle NA values
|
|
||||||
if (is.na(cv) || is.na(morans_i)) return(3) # Default to no stress
|
|
||||||
|
|
||||||
# Determine priority based on thresholds
|
|
||||||
if (cv < 0.1) {
|
|
||||||
if (morans_i < 0.7) {
|
|
||||||
return(3) # No stress
|
|
||||||
} else if (morans_i <= 0.9) {
|
|
||||||
return(2) # Monitor (young field with some clustering)
|
|
||||||
} else {
|
|
||||||
return(1) # Urgent
|
|
||||||
}
|
|
||||||
} else if (cv <= 0.15) {
|
|
||||||
if (morans_i < 0.7) {
|
|
||||||
return(2) # Monitor
|
|
||||||
} else {
|
|
||||||
return(1) # Urgent
|
|
||||||
}
|
|
||||||
} else { # cv > 0.15
|
|
||||||
return(1) # Urgent
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
|
|
||||||
# Set the project directory from parameters
|
|
||||||
project_dir <- params$data_dir
|
|
||||||
|
|
||||||
# Source project parameters with error handling
|
|
||||||
tryCatch({
|
|
||||||
source(here::here("r_app", "parameters_project.R"))
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Error loading parameters_project.R: ", e$message)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Log initial configuration
|
|
||||||
safe_log("Starting the R Markdown script with KPIs - NO TABLES VERSION")
|
|
||||||
safe_log(paste("mail_day params:", params$mail_day))
|
|
||||||
safe_log(paste("report_date params:", params$report_date))
|
|
||||||
safe_log(paste("mail_day variable:", mail_day))
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
|
|
||||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
|
||||||
# Primary expected directory inside the laravel storage
|
|
||||||
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
|
||||||
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
|
||||||
|
|
||||||
# Calculate current week from report_date using ISO 8601 week numbering
|
|
||||||
current_week <- as.numeric(format(as.Date(report_date), "%V"))
|
|
||||||
current_year <- as.numeric(format(as.Date(report_date), "%G"))
|
|
||||||
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
|
|
||||||
|
|
||||||
# Candidate filenames we expect (exact and common variants)
|
|
||||||
expected_summary_names <- c(
|
|
||||||
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
|
|
||||||
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
|
|
||||||
paste0(project_dir, "_kpi_summary_tables.rds"),
|
|
||||||
"kpi_summary_tables.rds",
|
|
||||||
paste0("kpi_summary_tables_", week_suffix, ".rds"),
|
|
||||||
paste0("kpi_summary_tables_", date_suffix, ".rds")
|
|
||||||
)
|
|
||||||
|
|
||||||
expected_field_details_names <- c(
|
|
||||||
paste0(project_dir, "_field_details_", week_suffix, ".rds"),
|
|
||||||
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
|
|
||||||
paste0(project_dir, "_field_details.rds"),
|
|
||||||
"field_details.rds"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
|
|
||||||
try_load_from_dir <- function(dir, candidates) {
|
|
||||||
if (!dir.exists(dir)) return(NULL)
|
|
||||||
for (name in candidates) {
|
|
||||||
f <- file.path(dir, name)
|
|
||||||
if (file.exists(f)) return(f)
|
|
||||||
}
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Try primary directory first
|
|
||||||
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
|
|
||||||
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
|
|
||||||
|
|
||||||
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
|
|
||||||
if (is.null(summary_file) || is.null(field_details_file)) {
|
|
||||||
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
|
|
||||||
# List rds files under laravel_app/storage/app recursively
|
|
||||||
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
|
|
||||||
# Try to match by expected names
|
|
||||||
if (is.null(summary_file)) {
|
|
||||||
matched <- files[basename(files) %in% expected_summary_names]
|
|
||||||
if (length(matched) > 0) summary_file <- matched[1]
|
|
||||||
}
|
|
||||||
if (is.null(field_details_file)) {
|
|
||||||
matched2 <- files[basename(files) %in% expected_field_details_names]
|
|
||||||
if (length(matched2) > 0) field_details_file <- matched2[1]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Final checks and load with safe error messages
|
|
||||||
kpi_files_exist <- FALSE
|
|
||||||
if (!is.null(summary_file) && file.exists(summary_file)) {
|
|
||||||
safe_log(paste("Loading KPI summary from:", summary_file))
|
|
||||||
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
|
|
||||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
|
||||||
} else {
|
|
||||||
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!is.null(field_details_file) && file.exists(field_details_file)) {
|
|
||||||
safe_log(paste("Loading field details from:", field_details_file))
|
|
||||||
field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL })
|
|
||||||
if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE
|
|
||||||
} else {
|
|
||||||
safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (kpi_files_exist) {
|
|
||||||
safe_log("✓ KPI summary tables loaded successfully")
|
|
||||||
} else {
|
|
||||||
safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING")
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Generate field-specific KPI summary for display in reports
|
|
||||||
#' @param field_name Name of the field to summarize
|
|
||||||
#' @param field_details_table Data frame with field-level KPI details
|
|
||||||
#' @return Formatted text string with field KPI summary
|
|
||||||
generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadrant) {
|
|
||||||
tryCatch({
|
|
||||||
# Get field age from CI quadrant data for the CURRENT SEASON only
|
|
||||||
# First identify the current season for this field
|
|
||||||
current_season <- CI_quadrant %>%
|
|
||||||
filter(field == field_name, Date <= as.Date(report_date)) %>%
|
|
||||||
group_by(season) %>%
|
|
||||||
summarise(season_end = max(Date), .groups = 'drop') %>%
|
|
||||||
filter(season == max(season)) %>%
|
|
||||||
pull(season)
|
|
||||||
|
|
||||||
# Get the most recent DOY from the current season
|
|
||||||
field_age <- CI_quadrant %>%
|
|
||||||
filter(field == field_name, season == current_season) %>%
|
|
||||||
pull(DOY) %>%
|
|
||||||
max(na.rm = TRUE)
|
|
||||||
|
|
||||||
# Filter data for this specific field
|
|
||||||
field_data <- field_details_table %>%
|
|
||||||
filter(Field == field_name)
|
|
||||||
|
|
||||||
if (nrow(field_data) == 0) {
|
|
||||||
return(paste("**Field", field_name, "KPIs:** Data not available"))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Aggregate sub-field data for field-level summary
|
|
||||||
# For categorical data, take the most common value or highest risk level
|
|
||||||
field_summary <- field_data %>%
|
|
||||||
summarise(
|
|
||||||
field_size = sum(`Field Size (ha)`, na.rm = TRUE),
|
|
||||||
uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"),
|
|
||||||
avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)),
|
|
||||||
max_gap_score = max(`Gap Score`, na.rm = TRUE),
|
|
||||||
highest_decline_risk = case_when(
|
|
||||||
any(`Decline Risk` == "Very-high") ~ "Very-high",
|
|
||||||
any(`Decline Risk` == "High") ~ "High",
|
|
||||||
any(`Decline Risk` == "Moderate") ~ "Moderate",
|
|
||||||
any(`Decline Risk` == "Low") ~ "Low",
|
|
||||||
TRUE ~ "Unknown"
|
|
||||||
),
|
|
||||||
highest_weed_risk = case_when(
|
|
||||||
any(`Weed Risk` == "High") ~ "High",
|
|
||||||
any(`Weed Risk` == "Moderate") ~ "Moderate",
|
|
||||||
any(`Weed Risk` == "Low") ~ "Low",
|
|
||||||
TRUE ~ "Unknown"
|
|
||||||
),
|
|
||||||
avg_mean_ci = mean(`Mean CI`, na.rm = TRUE),
|
|
||||||
avg_cv = mean(`CV Value`, na.rm = TRUE),
|
|
||||||
.groups = 'drop'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Apply age-based filtering to yield forecast
|
|
||||||
if (is.na(field_age) || field_age < 240) {
|
|
||||||
field_summary$avg_yield_forecast <- NA_real_
|
|
||||||
}
|
|
||||||
|
|
||||||
# Format the summary text
|
|
||||||
yield_text <- if (is.na(field_summary$avg_yield_forecast)) {
|
|
||||||
"Yield Forecast: NA"
|
|
||||||
} else {
|
|
||||||
paste0("Yield Forecast: ", round(field_summary$avg_yield_forecast, 1), " t/ha")
|
|
||||||
}
|
|
||||||
|
|
||||||
kpi_text <- paste0(
|
|
||||||
"Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels,
|
|
||||||
" | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1),
|
|
||||||
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk,
|
|
||||||
" | Mean CI: ", round(field_summary$avg_mean_ci, 2)
|
|
||||||
)
|
|
||||||
|
|
||||||
kpi_text <- paste0("<span style='font-size:10pt'>", kpi_text, "</span>")
|
|
||||||
|
|
||||||
return(kpi_text)
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error generating KPI summary for field", field_name, ":", e$message), "ERROR")
|
|
||||||
return(paste("**Field", field_name, "KPIs:** Error generating summary"))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE}
|
|
||||||
# Set locale for consistent date formatting
|
|
||||||
Sys.setlocale("LC_TIME", "C")
|
|
||||||
|
|
||||||
# Initialize date variables from parameters
|
|
||||||
today <- as.character(report_date)
|
|
||||||
mail_day_as_character <- as.character(mail_day)
|
|
||||||
|
|
||||||
# Calculate report dates and weeks using ISO 8601 week numbering
|
|
||||||
report_date_obj <- as.Date(today)
|
|
||||||
current_week <- as.numeric(format(report_date_obj, "%V"))
|
|
||||||
year <- as.numeric(format(report_date_obj, "%Y"))
|
|
||||||
|
|
||||||
# Calculate dates for weekly analysis
|
|
||||||
week_start <- report_date_obj - ((as.numeric(format(report_date_obj, "%w")) + 1) %% 7)
|
|
||||||
week_end <- week_start + 6
|
|
||||||
|
|
||||||
# Calculate week days (copied from 05 script for compatibility)
|
|
||||||
report_date_as_week_day <- weekdays(lubridate::ymd(today))
|
|
||||||
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
|
|
||||||
|
|
||||||
# Calculate initial week number
|
|
||||||
week <- lubridate::week(today)
|
|
||||||
safe_log(paste("Initial week calculation:", week, "today:", today))
|
|
||||||
|
|
||||||
# Calculate previous dates for comparisons
|
|
||||||
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
|
|
||||||
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
|
|
||||||
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
|
|
||||||
|
|
||||||
# Adjust week calculation based on mail day
|
|
||||||
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
|
|
||||||
safe_log("Adjusting weeks because of mail day")
|
|
||||||
week <- lubridate::week(today) + 1
|
|
||||||
today_minus_1 <- as.character(lubridate::ymd(today))
|
|
||||||
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
|
|
||||||
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Calculate week numbers for previous weeks
|
|
||||||
week_minus_1 <- week - 1
|
|
||||||
week_minus_2 <- week - 2
|
|
||||||
week_minus_3 <- week - 3
|
|
||||||
|
|
||||||
# Format current week with leading zeros
|
|
||||||
week <- sprintf("%02d", week)
|
|
||||||
|
|
||||||
safe_log(paste("Report week:", current_week, "Year:", year))
|
|
||||||
safe_log(paste("Week range:", week_start, "to", week_end))
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
|
||||||
# Load CI quadrant data for field-level analysis
|
|
||||||
tryCatch({
|
|
||||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
|
||||||
safe_log("Successfully loaded CI quadrant data")
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Error loading CI quadrant data: ", e$message)
|
|
||||||
})
|
|
||||||
|
|
||||||
# NOTE: Overview maps skipped for this report
|
|
||||||
# Individual field sections load their own per-field mosaics directly
|
|
||||||
```
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r compute_benchmarks_once, include=FALSE}
|
|
||||||
# Compute CI benchmarks once for the entire estate
|
|
||||||
benchmarks <- compute_ci_benchmarks(CI_quadrant, project_dir, c(10, 50, 90))
|
|
||||||
if (!is.null(benchmarks)) {
|
|
||||||
safe_log("Benchmarks computed successfully for the report")
|
|
||||||
} else {
|
|
||||||
safe_log("Failed to compute benchmarks", "WARNING")
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Report Summary
|
|
||||||
|
|
||||||
**Farm Location:** `r toupper(project_dir)` Estate
|
|
||||||
**Report Period:** Week `r current_week` of `r year`
|
|
||||||
**Data Source:** Planet Labs Satellite Imagery
|
|
||||||
**Analysis Type:** Chlorophyll Index (CI) Monitoring
|
|
||||||
**Report Generated on:** `r format(Sys.time(), "%B %d, %Y at %H:%M")`
|
|
||||||
|
|
||||||
**NOTE: THIS IS A NO-TABLES VERSION FOR DIAGNOSTIC PURPOSES - MAPS AND GRAPHS ONLY**
|
|
||||||
|
|
||||||
\newpage
|
|
||||||
|
|
||||||
# Section 2: Field-by-Field Analysis
|
|
||||||
|
|
||||||
## Overview of Field-Level Insights
|
|
||||||
This section provides detailed, field-specific analyses including chlorophyll index maps, trend graphs, and performance metrics. Each field is analyzed individually to support targeted interventions.
|
|
||||||
|
|
||||||
**Key Elements per Field:**
|
|
||||||
- Current and historical CI maps
|
|
||||||
- Week-over-week change visualizations
|
|
||||||
- Cumulative growth trends
|
|
||||||
- Field-specific KPI summaries
|
|
||||||
|
|
||||||
*Navigate to the following pages for individual field reports.*
|
|
||||||
|
|
||||||
\newpage
|
|
||||||
|
|
||||||
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
|
|
||||||
# Load field boundaries from parameters
|
|
||||||
tryCatch({
|
|
||||||
AllPivots0 <- field_boundaries_sf %>%
|
|
||||||
dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names
|
|
||||||
safe_log("Successfully loaded field boundaries")
|
|
||||||
|
|
||||||
# Prepare merged field list for use in summaries
|
|
||||||
AllPivots_merged <- AllPivots0 %>%
|
|
||||||
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names
|
|
||||||
dplyr::group_by(field) %>%
|
|
||||||
dplyr::summarise(.groups = 'drop')
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Error loading field boundaries: ", e$message)
|
|
||||||
})
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, dpi=300, dev='png', message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'}
|
|
||||||
# Generate detailed visualizations for each field
|
|
||||||
tryCatch({
|
|
||||||
# Merge field polygons for processing and filter out NA field names
|
|
||||||
AllPivots_merged <- AllPivots0 %>%
|
|
||||||
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA fields
|
|
||||||
dplyr::group_by(field) %>%
|
|
||||||
dplyr::summarise(.groups = 'drop')
|
|
||||||
|
|
||||||
# Use per-field weekly mosaic directory path from parameters_project.R
|
|
||||||
weekly_mosaic_per_field_dir <- weekly_CI_mosaic
|
|
||||||
|
|
||||||
# Helper to get week/year from a date
|
|
||||||
get_week_year <- function(date) {
|
|
||||||
list(
|
|
||||||
week = as.numeric(format(date, "%V")),
|
|
||||||
year = as.numeric(format(date, "%G"))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get week/year for current and historical weeks (local to field section)
|
|
||||||
current_ww <- get_week_year(as.Date(today))
|
|
||||||
minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1))
|
|
||||||
minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2))
|
|
||||||
minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3))
|
|
||||||
|
|
||||||
# Debug: check how many fields we're iterating
|
|
||||||
safe_log(paste("Starting visualization loop for", nrow(AllPivots_merged), "fields"), "DEBUG")
|
|
||||||
safe_log(paste("Fields to process:", paste(AllPivots_merged$field, collapse=", ")), "DEBUG")
|
|
||||||
|
|
||||||
# Generate plots for each field
|
|
||||||
for(i in seq_along(AllPivots_merged$field)) {
|
|
||||||
field_name <- AllPivots_merged$field[i]
|
|
||||||
safe_log(paste("Processing field", i, "of", nrow(AllPivots_merged), ":", field_name), "DEBUG")
|
|
||||||
|
|
||||||
# Skip if field_name is still NA (double check)
|
|
||||||
if(is.na(field_name)) {
|
|
||||||
safe_log(paste("Skipping field", i, "- NA name"), "DEBUG")
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
# Add page break before each field (except the first one)
|
|
||||||
if(i > 1) {
|
|
||||||
cat("\\newpage\n\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Load per-field mosaics directly for this field
|
|
||||||
field_CI <- NULL
|
|
||||||
field_CI_m1 <- NULL
|
|
||||||
field_CI_m2 <- NULL
|
|
||||||
field_CI_m3 <- NULL
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
# Load per-field mosaic for current week
|
|
||||||
per_field_path_current <- get_per_field_mosaic_path(
|
|
||||||
weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year
|
|
||||||
)
|
|
||||||
safe_log(paste("Looking for mosaic at:", per_field_path_current, "exists?", file.exists(per_field_path_current %||% "")), "DEBUG")
|
|
||||||
if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) {
|
|
||||||
field_CI <- terra::rast(per_field_path_current)[["CI"]]
|
|
||||||
safe_log(paste("Successfully loaded field_CI for", field_name), "DEBUG")
|
|
||||||
} else {
|
|
||||||
safe_log(paste("Could not load field_CI for", field_name, "- file not found"), "DEBUG")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Load per-field mosaic for week-1
|
|
||||||
per_field_path_m1 <- get_per_field_mosaic_path(
|
|
||||||
weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year
|
|
||||||
)
|
|
||||||
if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) {
|
|
||||||
field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Load per-field mosaic for week-2
|
|
||||||
per_field_path_m2 <- get_per_field_mosaic_path(
|
|
||||||
weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year
|
|
||||||
)
|
|
||||||
if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) {
|
|
||||||
field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Load per-field mosaic for week-3
|
|
||||||
per_field_path_m3 <- get_per_field_mosaic_path(
|
|
||||||
weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year
|
|
||||||
)
|
|
||||||
if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) {
|
|
||||||
field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]]
|
|
||||||
}
|
|
||||||
|
|
||||||
safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG")
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING")
|
|
||||||
})
|
|
||||||
|
|
||||||
# Calculate difference rasters from per-field data (local to this field)
|
|
||||||
last_week_dif_raster_field <- NULL
|
|
||||||
three_week_dif_raster_field <- NULL
|
|
||||||
|
|
||||||
if (!is.null(field_CI) && !is.null(field_CI_m1)) {
|
|
||||||
last_week_dif_raster_field <- field_CI - field_CI_m1
|
|
||||||
}
|
|
||||||
if (!is.null(field_CI) && !is.null(field_CI_m3)) {
|
|
||||||
three_week_dif_raster_field <- field_CI - field_CI_m3
|
|
||||||
}
|
|
||||||
|
|
||||||
# Call ci_plot with field-specific rasters
|
|
||||||
ci_plot(
|
|
||||||
pivotName = field_name,
|
|
||||||
field_boundaries = AllPivots0,
|
|
||||||
current_ci = field_CI,
|
|
||||||
ci_minus_1 = field_CI_m1,
|
|
||||||
ci_minus_2 = field_CI_m2,
|
|
||||||
last_week_diff = last_week_dif_raster_field,
|
|
||||||
three_week_diff = three_week_dif_raster_field,
|
|
||||||
harvesting_data = harvesting_data,
|
|
||||||
week = week,
|
|
||||||
week_minus_1 = week_minus_1,
|
|
||||||
week_minus_2 = week_minus_2,
|
|
||||||
week_minus_3 = week_minus_3,
|
|
||||||
borders = borders,
|
|
||||||
colorblind_friendly = colorblind_friendly
|
|
||||||
)
|
|
||||||
|
|
||||||
cat("\n\n")
|
|
||||||
|
|
||||||
# Special handling for ESA project field 00f25 - remove duplicate DOY values
|
|
||||||
if (project_dir == "esa" && field_name == "00F25") {
|
|
||||||
ci_quadrant_data <- CI_quadrant %>%
|
|
||||||
filter(field == "00F25") %>%
|
|
||||||
arrange(DOY) %>%
|
|
||||||
group_by(DOY) %>%
|
|
||||||
slice(1) %>%
|
|
||||||
ungroup()
|
|
||||||
} else {
|
|
||||||
ci_quadrant_data <- CI_quadrant
|
|
||||||
}
|
|
||||||
|
|
||||||
# Call cum_ci_plot with explicit parameters
|
|
||||||
cum_ci_plot(
|
|
||||||
pivotName = field_name,
|
|
||||||
ci_quadrant_data = ci_quadrant_data,
|
|
||||||
plot_type = ci_plot_type,
|
|
||||||
facet_on = facet_by_season,
|
|
||||||
x_unit = x_axis_unit,
|
|
||||||
colorblind_friendly = colorblind_friendly,
|
|
||||||
show_benchmarks = TRUE,
|
|
||||||
estate_name = project_dir,
|
|
||||||
benchmark_percentiles = c(10, 50, 90),
|
|
||||||
benchmark_data = benchmarks
|
|
||||||
)
|
|
||||||
|
|
||||||
cat("\n\n")
|
|
||||||
|
|
||||||
# Add field-specific KPI summary under the graphs
|
|
||||||
if (exists("field_details_table") && !is.null(field_details_table)) {
|
|
||||||
kpi_summary <- generate_field_kpi_summary(field_name, field_details_table, CI_quadrant)
|
|
||||||
cat(kpi_summary)
|
|
||||||
cat("\n\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error generating plots for field", field_name, ":", e$message), "ERROR")
|
|
||||||
cat("\\newpage\n\n")
|
|
||||||
cat("# Error generating plots for field ", field_name, "\n\n")
|
|
||||||
cat(e$message, "\n\n")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("Error in field visualization section:", e$message), "ERROR")
|
|
||||||
cat("Error generating field plots. See log for details.\n\n")
|
|
||||||
})
|
|
||||||
```
|
|
||||||
|
|
||||||
\newpage
|
|
||||||
|
|
||||||
# END OF NO-TABLES DIAGNOSTIC REPORT
|
|
||||||
|
|
||||||
This diagnostic report contains only maps and graphs to help identify if the visualization system is working correctly.
|
|
||||||
|
|
||||||
*Generated for diagnostic purposes*
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
params:
|
params:
|
||||||
ref: "word-styles-reference-var1.docx"
|
ref: "word-styles-reference-var1.docx"
|
||||||
output_file: CI_report.docx
|
output_file: CI_report.docx
|
||||||
report_date: "2026-01-25"
|
report_date: "2025-09-30"
|
||||||
data_dir: "angata"
|
data_dir: "angata"
|
||||||
mail_day: "Wednesday"
|
mail_day: "Wednesday"
|
||||||
borders: FALSE
|
borders: FALSE
|
||||||
|
|
@ -38,20 +38,25 @@ reporting_script <- TRUE
|
||||||
|
|
||||||
# Load all packages at once with suppressPackageStartupMessages
|
# Load all packages at once with suppressPackageStartupMessages
|
||||||
suppressPackageStartupMessages({
|
suppressPackageStartupMessages({
|
||||||
library(here)
|
# File path handling
|
||||||
library(sf)
|
library(here) # For relative path resolution (platform-independent file paths)
|
||||||
library(terra)
|
|
||||||
library(tidyverse)
|
# Spatial data handling
|
||||||
library(tmap)
|
library(sf) # For reading/manipulating field boundaries (GeoJSON)
|
||||||
library(lubridate)
|
library(terra) # For raster operations (reading mosaic TIFFs for visualization)
|
||||||
library(zoo)
|
|
||||||
library(rsample)
|
# Data manipulation
|
||||||
library(caret)
|
library(tidyverse) # For dplyr, ggplot2, tidyr (data wrangling and visualization)
|
||||||
library(randomForest)
|
library(tidyr) # For data reshaping (pivot_longer, pivot_wider for wide-to-long conversion)
|
||||||
library(CAST)
|
library(lubridate) # For date/time operations (week extraction, date formatting)
|
||||||
library(knitr)
|
library(zoo) # For zoo objects (time series manipulation, na.locf for gap filling)
|
||||||
library(tidyr)
|
|
||||||
library(flextable)
|
# Visualization
|
||||||
|
library(tmap) # For interactive maps (field boundary visualization)
|
||||||
|
|
||||||
|
# Reporting
|
||||||
|
library(knitr) # For R Markdown document generation (code execution and output)
|
||||||
|
library(flextable) # For formatted tables in Word output (professional table styling)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Load custom utility functions
|
# Load custom utility functions
|
||||||
|
|
@ -105,6 +110,16 @@ tryCatch({
|
||||||
stop("Error loading parameters_project.R: ", e$message)
|
stop("Error loading parameters_project.R: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Source common utilities for logging and helper functions
|
||||||
|
tryCatch({
|
||||||
|
source(here::here("r_app", "00_common_utils.R"))
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("Error loading 00_common_utils.R: ", e$message)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Load centralized paths
|
||||||
|
paths <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
# Log initial configuration
|
# Log initial configuration
|
||||||
safe_log("Starting the R Markdown script with KPIs")
|
safe_log("Starting the R Markdown script with KPIs")
|
||||||
safe_log(paste("mail_day params:", params$mail_day))
|
safe_log(paste("mail_day params:", params$mail_day))
|
||||||
|
|
@ -112,16 +127,23 @@ safe_log(paste("report_date params:", params$report_date))
|
||||||
safe_log(paste("mail_day variable:", mail_day))
|
safe_log(paste("mail_day variable:", mail_day))
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r load_kpi_data, eval=TRUE, message=FALSE, warning=FALSE, include=FALSE}
|
```{r load_kpi_data, message=FALSE, warning=FALSE}
|
||||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||||
# Primary expected directory inside the laravel storage
|
|
||||||
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
# First, show working directory for debugging
|
||||||
|
cat("\n=== DEBUG: R Markdown Working Directory ===\n")
|
||||||
|
cat(paste("getwd():", getwd(), "\n"))
|
||||||
|
cat(paste("Expected knit_dir from R Markdown:", knitr::opts_knit$get("root.dir"), "\n\n"))
|
||||||
|
|
||||||
|
# Primary expected directory from centralized paths
|
||||||
|
kpi_data_dir <- paths$kpi_reports_dir
|
||||||
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
||||||
|
|
||||||
# Calculate current week from report_date using ISO 8601 week numbering
|
# Calculate current week from report_date using ISO 8601 week numbering
|
||||||
current_week <- as.numeric(format(as.Date(report_date), "%V"))
|
current_week <- as.numeric(format(as.Date(report_date), "%V"))
|
||||||
current_iso_year <- as.numeric(format(as.Date(report_date), "%G"))
|
current_year <- as.numeric(format(as.Date(report_date), "%G"))
|
||||||
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_iso_year)
|
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
|
||||||
|
|
||||||
# Candidate filenames we expect (exact and common variants)
|
# Candidate filenames we expect (exact and common variants)
|
||||||
expected_summary_names <- c(
|
expected_summary_names <- c(
|
||||||
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
|
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
|
||||||
|
|
@ -171,30 +193,69 @@ if (is.null(summary_file) || is.null(field_details_file)) {
|
||||||
|
|
||||||
# Final checks and load with safe error messages
|
# Final checks and load with safe error messages
|
||||||
kpi_files_exist <- FALSE
|
kpi_files_exist <- FALSE
|
||||||
|
|
||||||
|
# Debug: log what we're looking for
|
||||||
|
cat("\n=== KPI LOADING DEBUG ===\n")
|
||||||
|
cat(paste("Working directory:", getwd(), "\n"))
|
||||||
|
cat(paste("project_dir:", project_dir, "\n"))
|
||||||
|
cat(paste("report_date:", report_date, "\n"))
|
||||||
|
cat(paste("Calculated week:", current_week, "year:", current_year, "\n"))
|
||||||
|
cat(paste("Looking for KPI files in:", kpi_data_dir, "\n"))
|
||||||
|
cat(paste("Directory exists:", dir.exists(kpi_data_dir), "\n"))
|
||||||
|
cat(paste("Expected filenames to match:\n"))
|
||||||
|
for (name in expected_summary_names) cat(paste(" -", name, "\n"))
|
||||||
|
|
||||||
|
# List what's actually in the directory
|
||||||
|
if (dir.exists(kpi_data_dir)) {
|
||||||
|
actual_files <- list.files(kpi_data_dir, pattern = ".*\\.rds$", full.names = FALSE)
|
||||||
|
cat(paste("Files in KPI directory (", length(actual_files), " total):\n"))
|
||||||
|
for (f in actual_files) cat(paste(" -", f, "\n"))
|
||||||
|
} else {
|
||||||
|
cat("KPI directory does NOT exist!\n")
|
||||||
|
}
|
||||||
|
|
||||||
if (!is.null(summary_file) && file.exists(summary_file)) {
|
if (!is.null(summary_file) && file.exists(summary_file)) {
|
||||||
safe_log(paste("Loading KPI summary from:", summary_file))
|
cat(paste("✓ FOUND summary file:", summary_file, "\n"))
|
||||||
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
|
cat(paste(" File size:", file.size(summary_file), "bytes\n"))
|
||||||
|
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL })
|
||||||
|
|
||||||
|
if (!is.null(summary_data)) {
|
||||||
|
cat(paste(" ✓ Loaded successfully. Class:", class(summary_data), "\n"))
|
||||||
|
if (is.list(summary_data)) {
|
||||||
|
cat(paste(" List names:", paste(names(summary_data), collapse = ", "), "\n"))
|
||||||
|
}
|
||||||
|
|
||||||
# Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format
|
# Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format
|
||||||
if (!is.null(summary_data)) {
|
|
||||||
if (is.list(summary_data) && !is.data.frame(summary_data)) {
|
if (is.list(summary_data) && !is.data.frame(summary_data)) {
|
||||||
# New format from 09_field_analysis_weekly.R - just pass it through
|
# New format from 09_field_analysis_weekly.R - just pass it through
|
||||||
if ("field_analysis_summary" %in% names(summary_data)) {
|
if ("field_analysis_summary" %in% names(summary_data)) {
|
||||||
|
cat(" ✓ Found field_analysis_summary in list - will use this structure\n")
|
||||||
# Keep the new structure intact - combined_kpi_table will use it directly
|
# Keep the new structure intact - combined_kpi_table will use it directly
|
||||||
kpi_files_exist <- TRUE
|
kpi_files_exist <- TRUE
|
||||||
} else {
|
} else {
|
||||||
|
cat(" ! Old format detected\n")
|
||||||
# Old format - keep as is
|
# Old format - keep as is
|
||||||
summary_tables <- summary_data
|
summary_tables <- summary_data
|
||||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
cat(" ! Data frame format\n")
|
||||||
# Data frame format or direct tables
|
# Data frame format or direct tables
|
||||||
summary_tables <- summary_data
|
summary_tables <- summary_data
|
||||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
cat(" ✗ Failed to load RDS - summary_data is NULL\n")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
|
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
|
||||||
|
safe_log(paste("Attempted directory:", kpi_data_dir), "WARNING")
|
||||||
|
# Try searching the entire workspace as fallback
|
||||||
|
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "kpi.*\\.rds$", recursive = TRUE, full.names = TRUE)
|
||||||
|
safe_log(paste("Found", length(files), "KPI RDS files in workspace"), "INFO")
|
||||||
|
if (length(files) > 0) {
|
||||||
|
safe_log(paste("Available files:", paste(basename(files), collapse = ", ")), "INFO")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is.null(field_details_file) && file.exists(field_details_file)) {
|
if (!is.null(field_details_file) && file.exists(field_details_file)) {
|
||||||
|
|
@ -427,7 +488,7 @@ safe_log(paste("Week range:", week_start, "to", week_end))
|
||||||
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE, eval=TRUE}
|
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE, eval=TRUE}
|
||||||
# Load CI index data with error handling
|
# Load CI index data with error handling
|
||||||
tryCatch({
|
tryCatch({
|
||||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||||
|
|
||||||
safe_log("Successfully loaded CI quadrant data")
|
safe_log("Successfully loaded CI quadrant data")
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
|
|
@ -683,20 +744,77 @@ tryCatch({
|
||||||
\newpage
|
\newpage
|
||||||
## 1.2 Key Performance Indicators
|
## 1.2 Key Performance Indicators
|
||||||
|
|
||||||
```{r combined_kpi_table, echo=FALSE, eval=TRUE}
|
```{r combined_kpi_table, echo=TRUE}
|
||||||
|
# Debug: check what variables exist
|
||||||
|
cat("\n=== DEBUG: combined_kpi_table chunk ===\n")
|
||||||
|
cat(paste("exists('summary_data'):", exists("summary_data"), "\n"))
|
||||||
|
cat(paste("exists('kpi_files_exist'):", exists("kpi_files_exist"), "\n"))
|
||||||
|
if (exists("kpi_files_exist")) {
|
||||||
|
cat(paste("kpi_files_exist value:", kpi_files_exist, "\n"))
|
||||||
|
}
|
||||||
|
if (exists("summary_data")) {
|
||||||
|
cat(paste("summary_data class:", class(summary_data), "\n"))
|
||||||
|
if (is.list(summary_data)) {
|
||||||
|
cat(paste("summary_data names:", paste(names(summary_data), collapse = ", "), "\n"))
|
||||||
|
cat(paste("has field_analysis_summary:", "field_analysis_summary" %in% names(summary_data), "\n"))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("summary_data DOES NOT EXIST in this chunk's environment!\n")
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
# Create summary KPI table from field_analysis_summary data
|
# Create summary KPI table from field_analysis_summary data
|
||||||
# This shows: Phases, Triggers, Area Change, and Total Farm acreage
|
# This shows: Phases, Triggers, Area Change, and Total Farm acreage
|
||||||
|
|
||||||
if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary" %in% names(summary_data)) {
|
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
||||||
field_analysis_summary <- summary_data$field_analysis_summary
|
# Load field analysis data
|
||||||
field_analysis_df <- summary_data$field_analysis
|
field_analysis_df <- summary_data$field_analysis
|
||||||
|
|
||||||
|
# If field_analysis_summary is NULL or doesn't exist, create it from field_analysis_df
|
||||||
|
if (is.null(summary_data$field_analysis_summary) || !("field_analysis_summary" %in% names(summary_data)) ||
|
||||||
|
!is.data.frame(summary_data$field_analysis_summary)) {
|
||||||
|
cat("\nNote: field_analysis_summary not in RDS, creating from field_analysis...\n")
|
||||||
|
|
||||||
|
# Create summary by aggregating by Status_Alert and Phase categories
|
||||||
|
# This groups fields by their phase and status to show distribution
|
||||||
|
phase_summary <- field_analysis_df %>%
|
||||||
|
filter(!is.na(Phase)) %>%
|
||||||
|
group_by(Phase) %>%
|
||||||
|
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
|
||||||
|
mutate(Category = Phase) %>%
|
||||||
|
select(Category, Acreage)
|
||||||
|
|
||||||
|
# Try to create Status trigger summary - use Status_Alert if available, otherwise use empty
|
||||||
|
trigger_summary <- tryCatch({
|
||||||
|
field_analysis_df %>%
|
||||||
|
filter(!is.na(Status_Alert), Status_Alert != "") %>%
|
||||||
|
group_by(Status_Alert) %>%
|
||||||
|
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
|
||||||
|
mutate(Category = Status_Alert) %>%
|
||||||
|
select(Category, Acreage)
|
||||||
|
}, error = function(e) {
|
||||||
|
cat("Could not create trigger summary:", e$message, "\n")
|
||||||
|
data.frame(Category = character(), Acreage = numeric())
|
||||||
|
})
|
||||||
|
|
||||||
|
# Combine into summary
|
||||||
|
field_analysis_summary <- bind_rows(phase_summary, trigger_summary)
|
||||||
|
|
||||||
|
cat(paste("Created summary with", nrow(field_analysis_summary), "category rows\n"))
|
||||||
|
|
||||||
|
} else {
|
||||||
|
# Use existing summary from RDS
|
||||||
|
field_analysis_summary <- summary_data$field_analysis_summary
|
||||||
|
}
|
||||||
|
|
||||||
# Phase names and trigger names to extract from summary
|
# Phase names and trigger names to extract from summary
|
||||||
phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
|
phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
|
||||||
trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected",
|
trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected",
|
||||||
"Germination Complete", "Germination Started", "No Active Trigger")
|
"Germination Complete", "Germination Started", "No Active Trigger",
|
||||||
|
"Ready for harvest-check", "Strong decline in crop health", "Harvested/bare")
|
||||||
|
|
||||||
# Extract phase distribution - match on category names directly
|
# Extract phase distribution - match on category names directly
|
||||||
|
if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) {
|
||||||
phase_rows <- field_analysis_summary %>%
|
phase_rows <- field_analysis_summary %>%
|
||||||
filter(Category %in% phase_names) %>%
|
filter(Category %in% phase_names) %>%
|
||||||
select(Category, Acreage) %>%
|
select(Category, Acreage) %>%
|
||||||
|
|
@ -802,6 +920,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary"
|
||||||
}
|
}
|
||||||
|
|
||||||
ft
|
ft
|
||||||
|
} else {
|
||||||
|
cat("KPI summary data available but is empty/invalid.\n")
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
cat("KPI summary data not available.\n")
|
cat("KPI summary data not available.\n")
|
||||||
}
|
}
|
||||||
|
|
@ -854,11 +975,11 @@ if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) {
|
||||||
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
||||||
field_analysis_table <- summary_data$field_analysis
|
field_analysis_table <- summary_data$field_analysis
|
||||||
|
|
||||||
# Extract fields with status triggers (non-null)
|
# Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger)
|
||||||
alerts_data <- field_analysis_table %>%
|
alerts_data <- field_analysis_table %>%
|
||||||
filter(!is.na(Status_trigger), Status_trigger != "") %>%
|
filter(!is.na(Status_Alert), Status_Alert != "") %>%
|
||||||
select(Field_id, Status_trigger) %>%
|
select(Field_id, Status_Alert) %>%
|
||||||
rename(Field = Field_id, Alert = Status_trigger)
|
rename(Field = Field_id, Alert = Status_Alert)
|
||||||
|
|
||||||
if (nrow(alerts_data) > 0) {
|
if (nrow(alerts_data) > 0) {
|
||||||
# Format alert messages for display
|
# Format alert messages for display
|
||||||
|
|
@ -892,22 +1013,50 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
# The report renders KPI tables and field summaries from that data
|
# The report renders KPI tables and field summaries from that data
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE, eval=TRUE}
|
```{r load_field_boundaries, message=FALSE, warning=FALSE, include=FALSE}
|
||||||
# Load field boundaries from parameters
|
# Load field boundaries from parameters (with fallback if geometry is invalid)
|
||||||
field_boundaries_sf <- sf::st_make_valid(field_boundaries_sf)
|
field_boundaries_loaded <- FALSE
|
||||||
tryCatch({
|
|
||||||
AllPivots0 <- field_boundaries_sf %>%
|
|
||||||
dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names
|
|
||||||
safe_log("Successfully loaded field boundaries")
|
|
||||||
|
|
||||||
# Prepare merged field list for use in summaries
|
tryCatch({
|
||||||
|
# Try to load and validate the field boundaries
|
||||||
|
if (exists("field_boundaries_sf") && !is.null(field_boundaries_sf)) {
|
||||||
|
# Try to filter - this will trigger geometry validation
|
||||||
|
AllPivots0 <- field_boundaries_sf %>%
|
||||||
|
dplyr::filter(!is.na(field), !is.na(sub_field))
|
||||||
|
|
||||||
|
# If successful, also create merged field list
|
||||||
AllPivots_merged <- AllPivots0 %>%
|
AllPivots_merged <- AllPivots0 %>%
|
||||||
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names
|
dplyr::filter(!is.na(field), !is.na(sub_field)) %>%
|
||||||
dplyr::group_by(field) %>%
|
dplyr::group_by(field) %>%
|
||||||
dplyr::summarise(.groups = 'drop')
|
dplyr::summarise(.groups = 'drop')
|
||||||
|
|
||||||
|
field_boundaries_loaded <- TRUE
|
||||||
|
safe_log("✓ Successfully loaded field boundaries")
|
||||||
|
} else {
|
||||||
|
safe_log("⚠ field_boundaries_sf not found in environment")
|
||||||
|
}
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
stop("Error loading field boundaries: ", e$message)
|
# If geometry is invalid, try to fix or skip
|
||||||
|
safe_log(paste("⚠ Error loading field boundaries:", e$message), "WARNING")
|
||||||
|
safe_log("Attempting to fix invalid geometries using st_make_valid()...", "WARNING")
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Try to repair invalid geometries
|
||||||
|
field_boundaries_sf_fixed <<- sf::st_make_valid(field_boundaries_sf)
|
||||||
|
AllPivots0 <<- field_boundaries_sf_fixed %>%
|
||||||
|
dplyr::filter(!is.na(field), !is.na(sub_field))
|
||||||
|
|
||||||
|
AllPivots_merged <<- AllPivots0 %>%
|
||||||
|
dplyr::filter(!is.na(field), !is.na(sub_field)) %>%
|
||||||
|
dplyr::group_by(field) %>%
|
||||||
|
dplyr::summarise(.groups = 'drop')
|
||||||
|
|
||||||
|
field_boundaries_loaded <<- TRUE
|
||||||
|
safe_log("✓ Fixed invalid geometries and loaded field boundaries")
|
||||||
|
}, error = function(e2) {
|
||||||
|
safe_log(paste("⚠ Could not repair geometries:", e2$message), "WARNING")
|
||||||
|
safe_log("Continuing without field boundary data", "WARNING")
|
||||||
|
})
|
||||||
})
|
})
|
||||||
```
|
```
|
||||||
\newpage
|
\newpage
|
||||||
|
|
|
||||||
631
r_app/MANUAL_PIPELINE_RUNNER.R
Normal file
631
r_app/MANUAL_PIPELINE_RUNNER.R
Normal file
|
|
@ -0,0 +1,631 @@
|
||||||
|
# ==============================================================================
|
||||||
|
# SMARTCANE MANUAL PIPELINE RUNNER
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# This file documents all pipeline steps as MANUAL COPY-PASTE COMMANDS.
|
||||||
|
# Do NOT run this script directly - instead, copy individual commands and
|
||||||
|
# paste them into your PowerShell terminal.
|
||||||
|
#
|
||||||
|
# This approach allows you to:
|
||||||
|
# - Run steps one at a time and inspect outputs
|
||||||
|
# - Re-run failed steps without re-running successful ones
|
||||||
|
# - Monitor progress between steps
|
||||||
|
# - Troubleshoot issues more easily than with automated pipeline
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
# PIPELINE SEQUENCE (IN ORDER)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# 1. Python: Download Planet satellite imagery (optional - only if new data needed)
|
||||||
|
# 2. R10: Split farm TIFFs into per-field directory structure
|
||||||
|
# 3. R20: Extract Canopy Index (CI) from 4-band imagery
|
||||||
|
# 4. R30: Interpolate growth model (smooth CI time series)
|
||||||
|
# 5. R21: Convert CI data to CSV format for Python
|
||||||
|
# 6. Python31: Harvest imminent predictions (optional - requires harvest.xlsx)
|
||||||
|
# 7. R40: Create weekly mosaic TIFFs
|
||||||
|
# 8. R80: Calculate KPIs (field uniformity, trends, stress)
|
||||||
|
# 9. R90/91: Generate Word reports (optional - Agronomic or Cane Supply)
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
# BEFORE YOU START
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# 1. Open PowerShell in the smartcane root directory:
|
||||||
|
# C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\
|
||||||
|
#
|
||||||
|
# 2. Define your parameters ONCE at the top of the session:
|
||||||
|
#
|
||||||
|
# $PROJECT = "angata" # Project: angata, chemba, xinavane, esa, simba
|
||||||
|
# $END_DATE = "2026-02-04" # YYYY-MM-DD format (e.g., 2026-02-04)
|
||||||
|
# $OFFSET = 7 # Days to look back (e.g., 7 for one week)
|
||||||
|
# $WEEK = 6 # ISO week number (1-53) - auto-calculated from END_DATE
|
||||||
|
# $YEAR = 2026 # ISO year - auto-calculated from END_DATE
|
||||||
|
#
|
||||||
|
# 3. Use these variables in the commands below by replacing [PROJECT], [END_DATE], etc.
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
# COMMAND REFERENCE
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 0: PYTHON - Download Planet Satellite Imagery (OPTIONAL)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Download 4-band (RGB+NIR) satellite imagery from Planet Labs API
|
||||||
|
# Downloads to: laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif
|
||||||
|
#
|
||||||
|
# WHEN TO RUN:
|
||||||
|
# - Only needed if you have new dates to process
|
||||||
|
# - Pipeline skips dates already in merged_tif/ or field_tiles/
|
||||||
|
# - First-time setup: download for your date range
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
# DATE: YYYY-MM-DD format (e.g., 2026-02-04)
|
||||||
|
# RESOLUTION: 3 meters (default) - can also use 5, 10
|
||||||
|
# --cleanup: Delete intermediate files after download
|
||||||
|
# --clear-all: Clear all output folders before downloading
|
||||||
|
#
|
||||||
|
# COMMAND #1 - Single Date Download:
|
||||||
|
#
|
||||||
|
# cd python_app
|
||||||
|
# python 00_download_8band_pu_optimized.py [PROJECT] --date [DATE] --resolution 3 --cleanup
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
|
||||||
|
#
|
||||||
|
# COMMAND #2 - Batch Download (Multiple Dates):
|
||||||
|
# For date ranges, MUST use download_planet_missing_dates.py (not Script 00)
|
||||||
|
#
|
||||||
|
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
|
||||||
|
#
|
||||||
|
# IMPORTANT DISTINCTION:
|
||||||
|
# - Script 00 (00_download_8band_pu_optimized.py): Only supports --date flag for SINGLE dates
|
||||||
|
# - Script download_planet_missing_dates.py: Supports --start/--end for DATE RANGES
|
||||||
|
# Script 00 does NOT have --start/--end flags despite documentation suggestion
|
||||||
|
# Use the correct script for your use case!
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
|
||||||
|
#
|
||||||
|
# Note: Planet API requires authentication (PLANET_API_KEY environment variable)
|
||||||
|
# Cost: ~1,500-2,000 PU per date
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 1: R10 - Create Per-Field TIFF Structure
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Split farm-wide GeoTIFFs into per-field directory structure.
|
||||||
|
# Transforms: merged_tif/{DATE}.tif (single file)
|
||||||
|
# → field_tiles/{FIELD_ID}/{DATE}.tif (per-field files)
|
||||||
|
# This enables clean, scalable processing in downstream scripts.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif (4-band RGB+NIR)
|
||||||
|
# - Field boundaries: laravel_app/storage/app/{PROJECT}/pivot.geojson
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif
|
||||||
|
# - One TIFF per field per date (1185 fields × N dates in Angata)
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba (default: angata)
|
||||||
|
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-09, default: today)
|
||||||
|
# OFFSET: Days to look back (e.g., 7 for one week, default: 7)
|
||||||
|
#
|
||||||
|
# COMMAND #1 - Default (All dates, current date, 7-day window):
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||||||
|
#
|
||||||
|
# COMMAND #2 - Specific Date Range:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] [END_DATE] [OFFSET]
|
||||||
|
#
|
||||||
|
# Example (one week back from 2026-02-09):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
|
||||||
|
#
|
||||||
|
# Example (two weeks back from 2026-02-09):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 14
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
|
||||||
|
# Storage location: laravel_app/storage/app/angata/field_tiles/
|
||||||
|
# Script execution time: 5-10 minutes (depends on number of dates)
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 2: R20 - Extract Chlorophyll Index (CI)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Calculate Chlorophyll Index from 4-band imagery and create 5-band output TIFFs.
|
||||||
|
# Also extracts CI statistics per sub_field for daily tracking.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif (4-band)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif (5-band with CI)
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD_ID}/{DATE}.rds
|
||||||
|
#
|
||||||
|
# EXPECTED BEHAVIOR:
|
||||||
|
# If field_tiles_CI/ or daily_vals/ missing files, Script 20 will process them
|
||||||
|
# Script 20 skips files that already exist (to avoid re-processing)
|
||||||
|
# ⚠️ IF NOT ALL FILES CREATED: See troubleshooting section below
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - date range end
|
||||||
|
# OFFSET: Days to look back (e.g., 7 for one week window)
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-09 7
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
|
||||||
|
# Example: 1185 fields × 8 dates = 9,480 files in field_tiles_CI/
|
||||||
|
# Storage location: laravel_app/storage/app/angata/field_tiles_CI/
|
||||||
|
# Script execution time: 10-20 minutes (depends on number of dates+fields)
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# Script 20 processes dates between (END_DATE - OFFSET) and END_DATE
|
||||||
|
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
|
||||||
|
# To process all existing merged_tif files: Use large OFFSET (e.g., 365)
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 3: R30 - Interpolate Growth Model
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Smooth CI time series using LOESS interpolation to fill gaps.
|
||||||
|
# Creates continuous growth curves for each field across all measurement dates.
|
||||||
|
# Enables trend analysis, yield prediction, and cumulative growth metrics.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - Daily CI statistics from Script 20 (field_tiles_CI/ per-field RDS files)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
# - (This is the growth model output used by Script 21 and 80)
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
# Contains: Interpolated CI data for all fields (wide format)
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 4: R21 - Convert CI RDS to CSV (Python Format)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Convert growth model output from R's RDS format to Python-compatible CSV.
|
||||||
|
# Transforms from wide format (fields × dates) to long format (one row per field-date pair).
|
||||||
|
# Prepares data for Python harvest detection models.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
# (Output from Script 30)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||||
|
# - Columns: field, sub_field, Date, FitData, DOY, value
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: ci_data_for_python.csv (~5-10 MB)
|
||||||
|
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 5: PYTHON31 - Harvest Imminent Predictions (OPTIONAL)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Predict which fields are approaching harvest in the next 28 days.
|
||||||
|
# Uses neural network (Model 307) trained on historical harvest dates.
|
||||||
|
# Generates weekly probability scores for operational harvest scheduling.
|
||||||
|
#
|
||||||
|
# REQUIRES:
|
||||||
|
# - harvest.xlsx with field planting/harvest dates
|
||||||
|
# - ci_data_for_python.csv from Script 21
|
||||||
|
# - PyTorch environment (conda pytorch_gpu)
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/Data/harvest.xlsx
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||||
|
# - Columns: field, sub_field, imminent_prob, detected_prob, week, year, as_of_date, num_days
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||||
|
# Rows: One per field (e.g., 1185 rows for Angata)
|
||||||
|
#
|
||||||
|
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 6: R40 - Create Weekly Mosaic TIFFs
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Aggregate daily per-field CI TIFFs into weekly mosaics.
|
||||||
|
# Handles multiple dates (full week) with maximum CI value per pixel.
|
||||||
|
# Creates 5-band output for reporting and KPI calculations.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif
|
||||||
|
# (Daily per-field CI TIFFs from Script 20)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_{WW}_{YYYY}.tif
|
||||||
|
# - One per field per week (e.g., 1185 fields × 1 week = 1,185 files)
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - determines ISO week
|
||||||
|
# OFFSET: Days to look back (e.g., 7 for one week window)
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R [END_DATE] [OFFSET] [PROJECT]
|
||||||
|
#
|
||||||
|
# Example (one week window):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# Location: laravel_app/storage/app/angata/weekly_mosaic/
|
||||||
|
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
|
||||||
|
# Files created: #fields (e.g., 1185 for Angata)
|
||||||
|
# Storage: ~50-100 MB total for all mosaic TIFFs
|
||||||
|
# Script execution time: 5-10 minutes
|
||||||
|
#
|
||||||
|
# NOTE: Files are named with ISO week number (WW) and year (YYYY)
|
||||||
|
# Week calculation is automatic based on END_DATE
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 7: R80 - Calculate Key Performance Indicators (KPIs)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Calculate per-field metrics from weekly mosaic TIFFs:
|
||||||
|
# - Field uniformity (CV - Coefficient of Variation)
|
||||||
|
# - Growth trends (4-week and 8-week)
|
||||||
|
# - Area change detection
|
||||||
|
# - TCH forecast
|
||||||
|
# - Spatial clustering (weed/stress detection)
|
||||||
|
# - Generates Excel export for dashboards and reporting
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_*.tif
|
||||||
|
# - Field boundaries (pivot.geojson)
|
||||||
|
# - Harvest data (harvest.xlsx)
|
||||||
|
# - Historical stats cache (RDS from previous weeks)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds (cached stats)
|
||||||
|
# - 21 columns with field-level KPIs and alerts
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# END_DATE: Report date in YYYY-MM-DD format (default: today)
|
||||||
|
# PROJECT: Project name: angata, chemba, xinavane, esa, simba (default: angata)
|
||||||
|
# OFFSET: Days to look back for historical comparison (default: 7, for backward compatibility)
|
||||||
|
#
|
||||||
|
# COMMAND #1 - Current Date & Default Project (Auto-detects TODAY):
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
|
||||||
|
#
|
||||||
|
# COMMAND #2 - Specific Date & Project:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET]
|
||||||
|
#
|
||||||
|
# Example (2026-02-09, angata, 7-day lookback):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-09 angata 7
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
# Rows: One per field (e.g., 1185 for Angata)
|
||||||
|
# Columns: 21 KPI columns (uniformity, trend, alerts, etc.)
|
||||||
|
# Location: laravel_app/storage/app/angata/output/
|
||||||
|
# Script execution time: 10-20 minutes
|
||||||
|
#
|
||||||
|
# EXPECTED COLUMNS:
|
||||||
|
# field, sub_field, phase, cv (uniformity), ci_mean, area_ha, area_ac,
|
||||||
|
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
|
||||||
|
# spatial_cluster, alert_urgency, alert_type, alert_message, etc.
|
||||||
|
#
|
||||||
|
# CRITICAL DIFFERENCE - R80 Uses Different Argument Order Than R40:
|
||||||
|
# R40 order: [END_DATE] [OFFSET] [PROJECT]
|
||||||
|
# R80 order: [END_DATE] [PROJECT] [OFFSET]
|
||||||
|
# These are NOT the same! Ensure correct order for each script.
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 8: R90/R91 - Generate Word Report (OPTIONAL)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Generate formatted Word report (.docx) with:
|
||||||
|
# - KPI summary tables and charts
|
||||||
|
# - Per-field performance metrics
|
||||||
|
# - Alerts and recommendations
|
||||||
|
# - Interpretation guides
|
||||||
|
#
|
||||||
|
# Client-Specific Reports:
|
||||||
|
# - R90: Agronomic Support (for AURA project)
|
||||||
|
# - R91: Cane Supply (for ANGATA, CHEMBA, XINAVANE, ESA)
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
# (from Script 80)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_*.docx
|
||||||
|
# - Formatted Word document (~5-10 MB)
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04)
|
||||||
|
# REPORT_TYPE: agronomic or cane_supply (determines which Rmd file to render)
|
||||||
|
#
|
||||||
|
# COMMAND #1 - AGRONOMIC REPORT (AURA project):
|
||||||
|
# From R console or R script:
|
||||||
|
#
|
||||||
|
# rmarkdown::render(
|
||||||
|
# "r_app/90_CI_report_with_kpis_simple.Rmd",
|
||||||
|
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
||||||
|
# output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx",
|
||||||
|
# output_dir = "laravel_app/storage/app/angata/reports"
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
|
||||||
|
# From R console or R script:
|
||||||
|
#
|
||||||
|
# rmarkdown::render(
|
||||||
|
# "r_app/91_CI_report_with_kpis_Angata.Rmd",
|
||||||
|
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
||||||
|
# output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
|
||||||
|
# output_dir = "laravel_app/storage/app/angata/reports"
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: SmartCane_Report_*_{PROJECT}_{DATE}.docx
|
||||||
|
# Location: laravel_app/storage/app/{PROJECT}/reports/
|
||||||
|
# Script execution time: 5-10 minutes
|
||||||
|
#
|
||||||
|
# NOTE:
|
||||||
|
# These are R Markdown files and cannot be run directly via Rscript
|
||||||
|
# Use rmarkdown::render() from an R interactive session or wrapper script
|
||||||
|
# See run_full_pipeline.R for an automated example
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# QUICK REFERENCE: Common Workflows
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# WORKFLOW A: Weekly Update (Most Common)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# Goal: Process latest week of data through full pipeline
|
||||||
|
#
|
||||||
|
# Parameters:
|
||||||
|
# $PROJECT = "angata"
|
||||||
|
# $END_DATE = "2026-02-04" # Today or latest date available
|
||||||
|
# $OFFSET = 7 # One week back
|
||||||
|
#
|
||||||
|
# Steps:
|
||||||
|
# 1. SKIP Python download (if you already have data)
|
||||||
|
# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 7
|
||||||
|
# (Argument order: [PROJECT] [END_DATE] [OFFSET])
|
||||||
|
# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
|
||||||
|
# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||||
|
# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||||
|
# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||||||
|
# (Argument order: [END_DATE] [OFFSET] [PROJECT])
|
||||||
|
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-04 angata 7
|
||||||
|
# (Argument order: [END_DATE] [PROJECT] [OFFSET] - DIFFERENT from R40!)
|
||||||
|
# 8. OPTIONAL R91 (Cane Supply) - Use automated runner:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
|
||||||
|
# OR from R console:
|
||||||
|
# rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd",
|
||||||
|
# params=list(data_dir="angata", report_date=as.Date("2026-02-04")),
|
||||||
|
# output_file="SmartCane_Report_cane_supply_angata_2026-02-04.docx",
|
||||||
|
# output_dir="laravel_app/storage/app/angata/reports")
|
||||||
|
#
|
||||||
|
# Execution time: ~60-90 minutes total
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# WORKFLOW B: Initial Setup (Large Backfill)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# Goal: Process multiple weeks of historical data
|
||||||
|
#
|
||||||
|
# Steps:
|
||||||
|
# 1. Python download (your entire date range)
|
||||||
|
# 2. Run R10 with large offset to process all historical dates:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 365
|
||||||
|
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
|
||||||
|
# 3. Run R20 with large offset to process all historical dates:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
|
||||||
|
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
|
||||||
|
# 4. Run R30 once (growth model full season)
|
||||||
|
# 5. Run R21 once (CSV export)
|
||||||
|
# 6. Run R40 with specific week windows as needed
|
||||||
|
# 7. Run R80 for each week you want KPIs for
|
||||||
|
|
||||||
|
# 6. For each week, run:
|
||||||
|
# - R40 with different END_DATE values (one per week)
|
||||||
|
# - R80 with different WEEK/YEAR values (one per week)
|
||||||
|
# - R91 optional (one per week report)
|
||||||
|
#
|
||||||
|
# Pro tip: Script R40 with offset=14 covers two weeks at once
|
||||||
|
# Then R40 again with offset=7 for just one week
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# WORKFLOW C: Troubleshooting (Check Intermediate Outputs)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# Goal: Verify outputs before moving to next step
|
||||||
|
#
|
||||||
|
# After R10: Check field_tiles/{FIELD_ID}/ has #dates files
|
||||||
|
# After R20: Check field_tiles_CI/{FIELD_ID}/ has same #dates files
|
||||||
|
# After R30: Check Data/extracted_ci/cumulative_vals/ has All_pivots_*.rds
|
||||||
|
# After R40: Check weekly_mosaic/{FIELD_ID}/ has week_WW_YYYY.tif per week
|
||||||
|
# After R80: Check output/ has {PROJECT}_field_analysis_week*.xlsx
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# TROUBLESHOOTING
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# ISSUE: R20 not processing all field_tiles files
|
||||||
|
# ────────────────────────────────────────────────
|
||||||
|
# Symptom: field_tiles has 496 files, field_tiles_CI only has 5
|
||||||
|
#
|
||||||
|
# Possible causes:
|
||||||
|
# 1. Source files incomplete or corrupted
|
||||||
|
# 2. Script 20 skips because CI TIFF already exists (even if incomplete)
|
||||||
|
# 3. Partial run from previous attempt
|
||||||
|
#
|
||||||
|
# Solutions:
|
||||||
|
# 1. Delete the small number of files in field_tiles_CI/{FIELD}/ (don't delete all!)
|
||||||
|
# rm laravel_app/storage/app/angata/field_tiles_CI/{fieldnum}/*
|
||||||
|
# 2. Re-run Script 20
|
||||||
|
# 3. If still fails, delete field_tiles_CI completely and re-run Script 20
|
||||||
|
# rm -r laravel_app/storage/app/angata/field_tiles_CI/
|
||||||
|
#
|
||||||
|
# ISSUE: Script 80 says "No per-field mosaic files found"
|
||||||
|
# ────────────────────────────────────────────────────────
|
||||||
|
# Symptom: R80 fails to calculate KPIs
|
||||||
|
#
|
||||||
|
# Possible causes:
|
||||||
|
# 1. Script 40 hasn't run yet (weekly_mosaic doesn't exist)
|
||||||
|
# 2. Wrong END_DATE or WEEK/YEAR combination
|
||||||
|
# 3. weekly_mosaic/{FIELD}/ directory missing (old format?)
|
||||||
|
#
|
||||||
|
# Solutions:
|
||||||
|
# 1. Ensure Script 40 has completed: Check weekly_mosaic/{FIELD}/ exists with week_WW_YYYY.tif
|
||||||
|
# 2. Verify END_DATE is within date range of available CI data
|
||||||
|
# 3. For current week: End date must be THIS week (same ISO week as today)
|
||||||
|
#
|
||||||
|
# ISSUE: Python download fails ("Not authorized")
|
||||||
|
# ────────────────────────────────────────────────
|
||||||
|
# Symptom: python 00_download_8band_pu_optimized.py fails with authentication error
|
||||||
|
#
|
||||||
|
# Cause: PLANET_API_KEY environment variable not set
|
||||||
|
#
|
||||||
|
# Solution:
|
||||||
|
# 1. Save your Planet API key: $env:PLANET_API_KEY = "your_key_here"
|
||||||
|
# 2. Verify: $env:PLANET_API_KEY (should show your key)
|
||||||
|
# 3. Try download again
|
||||||
|
#
|
||||||
|
# ISSUE: R30 takes too long
|
||||||
|
# ──────────────────────────
|
||||||
|
# Symptom: Script 30 running for >30 minutes
|
||||||
|
#
|
||||||
|
# Cause: LOESS interpolation is slow with many dates/fields
|
||||||
|
#
|
||||||
|
# Solution:
|
||||||
|
# 1. This is normal - large date ranges slow down interpolation
|
||||||
|
# 2. Subsequent runs are faster (cached results)
|
||||||
|
# 3. If needed: reduce offset or run fewer weeks at a time
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# SUMMARY OF FILES CREATED BY EACH SCRIPT
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# Script 10 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD}/{DATE}.tif
|
||||||
|
#
|
||||||
|
# Script 20 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD}/{DATE}.tif
|
||||||
|
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds
|
||||||
|
#
|
||||||
|
# Script 30 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
#
|
||||||
|
# Script 21 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||||
|
#
|
||||||
|
# Python 31 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||||
|
#
|
||||||
|
# Script 40 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD}/{DATE}/week_{WW}_{YYYY}.tif
|
||||||
|
#
|
||||||
|
# Script 80 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds
|
||||||
|
#
|
||||||
|
# Script 90/91 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
54
r_app/_SCRIPT_HEADER_TEMPLATE.R
Normal file
54
r_app/_SCRIPT_HEADER_TEMPLATE.R
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
# ============================================================================
|
||||||
|
# SCRIPT XX: [DESCRIPTIVE TITLE]
|
||||||
|
# ============================================================================
|
||||||
|
# PURPOSE:
|
||||||
|
# [What this script does in 2-3 sentences. Example: Downloads satellite
|
||||||
|
# imagery from Planet API, processes 4-band RGB+NIR data, and saves
|
||||||
|
# merged GeoTIFFs for use by downstream analysis stages.]
|
||||||
|
#
|
||||||
|
# INPUT DATA:
|
||||||
|
# - Source: [Which directory/files it reads. Example: Planet API, hardcoded bbox]
|
||||||
|
# - Format: [TIFF, RDS, GeoJSON, etc. Example: 4-band uint16 GeoTIFF]
|
||||||
|
# - Location: [Full path example]
|
||||||
|
#
|
||||||
|
# OUTPUT DATA:
|
||||||
|
# - Destination: [Which directory/files it creates. Example: laravel_app/storage/app/{project}/merged_tif/]
|
||||||
|
# - Format: [TIFF, RDS, CSV, etc. Example: Single-band float32 GeoTIFF]
|
||||||
|
# - Naming convention: [Example: {YYYY-MM-DD}.tif]
|
||||||
|
#
|
||||||
|
# USAGE:
|
||||||
|
# Rscript XX_script_name.R [arg1] [arg2] [arg3]
|
||||||
|
#
|
||||||
|
# Example (Windows PowerShell):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/XX_script_name.R angata 2026-01-15
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# - arg1: [Description, type, and valid values. Example: project (character) - angata, chemba, xinavane, esa, simba]
|
||||||
|
# - arg2: [Description, type, and valid values. Example: date (character, optional) - ISO format YYYY-MM-DD; default today]
|
||||||
|
#
|
||||||
|
# CLIENT TYPES:
|
||||||
|
# - cane_supply (ANGATA): [Yes/No - if Yes, briefly explain any differences]
|
||||||
|
# - agronomic_support (AURA): [Yes/No - if Yes, briefly explain any differences]
|
||||||
|
#
|
||||||
|
# DEPENDENCIES:
|
||||||
|
# - Packages: [dplyr, tidyr, terra, sf, readr, writexl, etc.]
|
||||||
|
# - Utils files: [parameters_project.R, 00_common_utils.R, XX_utils.R, etc.]
|
||||||
|
# - External data: [harvest.xlsx, pivot.geojson, etc.]
|
||||||
|
# - Data directories: [laravel_app/storage/app/{project}/]
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# [Any special considerations, assumptions, or future improvements.
|
||||||
|
# Example: Cloud filtering uses CI >= 0.5 threshold. Multi-field support
|
||||||
|
# implemented via field geometry masking from pivot.geojson.]
|
||||||
|
#
|
||||||
|
# RELATED ISSUES:
|
||||||
|
# SC-XXX: [Brief description of related work]
|
||||||
|
#
|
||||||
|
# HISTORY:
|
||||||
|
# 2026-02-03: [Description of change, if refactored or enhanced]
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# NOTE: This is a TEMPLATE file for documentation purposes only.
|
||||||
|
# When creating a new script or updating an existing one, copy this template
|
||||||
|
# and fill in all sections with accurate information about your specific script.
|
||||||
|
# Then delete this comment block.
|
||||||
|
|
@ -156,9 +156,10 @@ calculate_ci <- function(raster_obj) {
|
||||||
red_band <- raster_obj[[3]]
|
red_band <- raster_obj[[3]]
|
||||||
nir_band <- raster_obj[[4]]
|
nir_band <- raster_obj[[4]]
|
||||||
|
|
||||||
# CI formula: (NIR / Red) - 1
|
# CI formula: (NIR / Green) - 1, NOT (NIR / Red) - 1
|
||||||
# This highlights chlorophyll content in vegetation
|
# *** CRITICAL: Use GREEN band for Chlorophyll Index, NOT RED ***
|
||||||
ci_raster <- (nir_band / red_band) - 1
|
# GREEN band is essential for proper chlorophyll-sensitive calculation
|
||||||
|
ci_raster <- (nir_band / green_band) - 1
|
||||||
|
|
||||||
# Filter extreme values that may result from division operations
|
# Filter extreme values that may result from division operations
|
||||||
ci_raster[ci_raster > 10] <- 10 # Cap max value
|
ci_raster[ci_raster > 10] <- 10 # Cap max value
|
||||||
|
|
|
||||||
|
|
@ -68,8 +68,9 @@ calculate_enhanced_indices <- function(red, green, blue, nir) {
|
||||||
grvi <- green / red
|
grvi <- green / red
|
||||||
names(grvi) <- "GRVI"
|
names(grvi) <- "GRVI"
|
||||||
|
|
||||||
# 6. Chlorophyll Index (current CI - for comparison)
|
# 6. Chlorophyll Index (CI = NIR / Green - 1, NOT NIR/Red)
|
||||||
ci <- nir / red - 1
|
# *** CRITICAL: Correct formula uses GREEN band, not RED ***
|
||||||
|
ci <- nir / green - 1
|
||||||
names(ci) <- "CI"
|
names(ci) <- "CI"
|
||||||
|
|
||||||
return(list(
|
return(list(
|
||||||
|
|
|
||||||
|
|
@ -69,26 +69,55 @@ calculate_week_numbers <- function(report_date = Sys.Date()) {
|
||||||
#' @param year Year
|
#' @param year Year
|
||||||
#' @param mosaic_dir Directory containing weekly mosaics
|
#' @param mosaic_dir Directory containing weekly mosaics
|
||||||
#' @return Terra raster with CI band, or NULL if file not found
|
#' @return Terra raster with CI band, or NULL if file not found
|
||||||
|
# Helper function to load CI raster for a specific field (handles both single-file and per-field architectures)
|
||||||
|
load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) {
|
||||||
|
# Check if this is per-field loading mode
|
||||||
|
is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field")
|
||||||
|
|
||||||
|
if (is_per_field) {
|
||||||
|
# Per-field architecture: load this specific field's mosaic
|
||||||
|
per_field_dir <- attr(ci_raster_or_obj, "per_field_dir")
|
||||||
|
week_file <- attr(ci_raster_or_obj, "week_file")
|
||||||
|
field_mosaic_path <- file.path(per_field_dir, field_name, week_file)
|
||||||
|
|
||||||
|
if (file.exists(field_mosaic_path)) {
|
||||||
|
tryCatch({
|
||||||
|
field_mosaic <- terra::rast(field_mosaic_path)
|
||||||
|
# Extract CI band (5th band) if multi-band, otherwise use as-is
|
||||||
|
if (terra::nlyr(field_mosaic) >= 5) {
|
||||||
|
return(field_mosaic[[5]])
|
||||||
|
} else {
|
||||||
|
return(field_mosaic[[1]])
|
||||||
|
}
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Error loading per-field mosaic for", field_name, ":", e$message), "WARNING")
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
safe_log(paste("Per-field mosaic not found for", field_name), "WARNING")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
# Single-file architecture: crop from loaded raster
|
||||||
|
if (!is.null(field_vect)) {
|
||||||
|
return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE))
|
||||||
|
} else {
|
||||||
|
return(ci_raster_or_obj)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
|
load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
|
||||||
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
|
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
|
||||||
week_path <- file.path(mosaic_dir, week_file)
|
week_path <- file.path(mosaic_dir, week_file)
|
||||||
|
|
||||||
if (!file.exists(week_path)) {
|
# FIRST: Try to load single-file mosaic (legacy approach)
|
||||||
safe_log(paste("Weekly mosaic not found:", week_path), "WARNING")
|
if (file.exists(week_path)) {
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
mosaic_raster <- terra::rast(week_path)
|
mosaic_raster <- terra::rast(week_path)
|
||||||
# Extract CI band by name or position
|
ci_raster <- mosaic_raster[[5]] # CI is the 5th band
|
||||||
if ("CI" %in% names(mosaic_raster)) {
|
|
||||||
ci_raster <- mosaic_raster[["CI"]]
|
|
||||||
} else {
|
|
||||||
# Fallback: assume last band is CI (after Red, Green, Blue, NIR)
|
|
||||||
ci_raster <- mosaic_raster[[nlyr(mosaic_raster)]]
|
|
||||||
}
|
|
||||||
names(ci_raster) <- "CI"
|
names(ci_raster) <- "CI"
|
||||||
safe_log(paste("Loaded weekly mosaic:", week_file))
|
safe_log(paste("Loaded weekly mosaic (single-file):", week_file))
|
||||||
return(ci_raster)
|
return(ci_raster)
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
safe_log(paste("Error loading mosaic:", e$message), "ERROR")
|
safe_log(paste("Error loading mosaic:", e$message), "ERROR")
|
||||||
|
|
@ -96,6 +125,40 @@ load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# SECOND: Per-field architecture - store mosaic_dir path for later per-field loading
|
||||||
|
# Don't try to merge - just return the directory path so field-level functions can load per-field
|
||||||
|
if (dir.exists(mosaic_dir)) {
|
||||||
|
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
|
||||||
|
field_dirs <- field_dirs[field_dirs != ""]
|
||||||
|
|
||||||
|
# Check if any field has this week's mosaic
|
||||||
|
found_any <- FALSE
|
||||||
|
for (field in field_dirs) {
|
||||||
|
field_mosaic_path <- file.path(mosaic_dir, field, week_file)
|
||||||
|
if (file.exists(field_mosaic_path)) {
|
||||||
|
found_any <- TRUE
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found_any) {
|
||||||
|
safe_log(paste("Found per-field mosaics for week", sprintf("%02d", week_num), year,
|
||||||
|
"- will load per-field on demand"))
|
||||||
|
# Return a special object that indicates per-field loading is needed
|
||||||
|
# Store the mosaic_dir path in the raster's metadata
|
||||||
|
dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA)
|
||||||
|
attr(dummy_raster, "per_field_dir") <- mosaic_dir
|
||||||
|
attr(dummy_raster, "week_file") <- week_file
|
||||||
|
attr(dummy_raster, "is_per_field") <- TRUE
|
||||||
|
return(dummy_raster)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# If we get here, no mosaic found
|
||||||
|
safe_log(paste("Weekly mosaic not found for week", sprintf("%02d", week_num), year), "WARNING")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
# Function to prepare predictions with consistent naming and formatting
|
# Function to prepare predictions with consistent naming and formatting
|
||||||
prepare_predictions <- function(predictions, newdata) {
|
prepare_predictions <- function(predictions, newdata) {
|
||||||
return(predictions %>%
|
return(predictions %>%
|
||||||
|
|
@ -141,12 +204,16 @@ calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) {
|
||||||
# Extract field boundary
|
# Extract field boundary
|
||||||
field_vect <- field_boundaries_vect[i]
|
field_vect <- field_boundaries_vect[i]
|
||||||
|
|
||||||
# crop ci_raster with field_vect and use that for ci_values
|
# Load appropriate CI raster using helper function
|
||||||
cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE)
|
cropped_raster <- load_field_ci_raster(ci_raster, field_name, field_vect)
|
||||||
|
|
||||||
# Extract CI values for this field using helper function
|
# Extract CI values for this field using helper function
|
||||||
|
if (!is.null(cropped_raster)) {
|
||||||
field_values <- extract_ci_values(cropped_raster, field_vect)
|
field_values <- extract_ci_values(cropped_raster, field_vect)
|
||||||
valid_values <- field_values[!is.na(field_values) & is.finite(field_values)]
|
valid_values <- field_values[!is.na(field_values) & is.finite(field_values)]
|
||||||
|
} else {
|
||||||
|
valid_values <- c()
|
||||||
|
}
|
||||||
|
|
||||||
# If all valid values are 0 (cloud), fill with NA row
|
# If all valid values are 0 (cloud), fill with NA row
|
||||||
if (length(valid_values) == 0 || all(valid_values == 0)) {
|
if (length(valid_values) == 0 || all(valid_values == 0)) {
|
||||||
|
|
@ -286,9 +353,18 @@ calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries)
|
||||||
# Extract field boundary
|
# Extract field boundary
|
||||||
field_vect <- field_boundaries_vect[i]
|
field_vect <- field_boundaries_vect[i]
|
||||||
|
|
||||||
# Extract CI values for both weeks (using helper to get CI band only)
|
# Load appropriate CI rasters using helper function
|
||||||
current_values <- extract_ci_values(current_ci, field_vect)
|
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
|
||||||
previous_values <- extract_ci_values(previous_ci, field_vect)
|
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
|
||||||
|
|
||||||
|
# Extract CI values for both weeks
|
||||||
|
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
|
||||||
|
current_values <- extract_ci_values(current_field_ci, field_vect)
|
||||||
|
previous_values <- extract_ci_values(previous_field_ci, field_vect)
|
||||||
|
} else {
|
||||||
|
current_values <- c()
|
||||||
|
previous_values <- c()
|
||||||
|
}
|
||||||
|
|
||||||
# Clean values
|
# Clean values
|
||||||
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
||||||
|
|
@ -569,8 +645,18 @@ calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundari
|
||||||
sub_field_name <- field_boundaries$sub_field[i]
|
sub_field_name <- field_boundaries$sub_field[i]
|
||||||
field_vect <- field_boundaries_vect[i]
|
field_vect <- field_boundaries_vect[i]
|
||||||
|
|
||||||
# Extract CI values for both weeks (using helper to get CI band only)
|
# Load appropriate CI rasters using helper function
|
||||||
current_values <- extract_ci_values(current_ci, field_vect)
|
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
|
||||||
|
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
|
||||||
|
|
||||||
|
# Extract CI values for both weeks
|
||||||
|
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
|
||||||
|
current_values <- extract_ci_values(current_field_ci, field_vect)
|
||||||
|
previous_values <- extract_ci_values(previous_field_ci, field_vect)
|
||||||
|
} else {
|
||||||
|
current_values <- c()
|
||||||
|
previous_values <- c()
|
||||||
|
}
|
||||||
previous_values <- extract_ci_values(previous_ci, field_vect)
|
previous_values <- extract_ci_values(previous_ci, field_vect)
|
||||||
|
|
||||||
# Clean values
|
# Clean values
|
||||||
|
|
@ -728,8 +814,17 @@ calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundarie
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract CI values for both weeks (using helper to get CI band only)
|
# Extract CI values for both weeks (using helper to get CI band only)
|
||||||
current_values <- extract_ci_values(current_ci, field_vect)
|
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
|
||||||
previous_values <- extract_ci_values(previous_ci, field_vect)
|
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
|
||||||
|
|
||||||
|
# Extract CI values for both weeks
|
||||||
|
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
|
||||||
|
current_values <- extract_ci_values(current_field_ci, field_vect)
|
||||||
|
previous_values <- extract_ci_values(previous_field_ci, field_vect)
|
||||||
|
} else {
|
||||||
|
current_values <- c()
|
||||||
|
previous_values <- c()
|
||||||
|
}
|
||||||
|
|
||||||
# Clean values
|
# Clean values
|
||||||
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
||||||
|
|
@ -810,8 +905,15 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
||||||
sub_field_name <- field_boundaries$sub_field[i]
|
sub_field_name <- field_boundaries$sub_field[i]
|
||||||
field_vect <- field_boundaries_vect[i]
|
field_vect <- field_boundaries_vect[i]
|
||||||
|
|
||||||
|
# Load appropriate CI raster using helper function
|
||||||
|
field_ci <- load_field_ci_raster(ci_raster, field_name, field_vect)
|
||||||
|
|
||||||
# Extract CI values using helper function
|
# Extract CI values using helper function
|
||||||
ci_values <- extract_ci_values(ci_raster, field_vect)
|
if (!is.null(field_ci)) {
|
||||||
|
ci_values <- extract_ci_values(field_ci, field_vect)
|
||||||
|
} else {
|
||||||
|
ci_values <- c()
|
||||||
|
}
|
||||||
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
|
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
|
||||||
|
|
||||||
if (length(valid_values) > 1) {
|
if (length(valid_values) > 1) {
|
||||||
File diff suppressed because it is too large
Load diff
1240
r_app/parameters_project_OLD.R
Normal file
1240
r_app/parameters_project_OLD.R
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -4,24 +4,6 @@
|
||||||
# These functions support the creation of maps, charts and report elements
|
# These functions support the creation of maps, charts and report elements
|
||||||
# for the CI_report_dashboard_planet.Rmd document.
|
# for the CI_report_dashboard_planet.Rmd document.
|
||||||
|
|
||||||
#' Safe logging function that works whether log_message exists or not
|
|
||||||
#'
|
|
||||||
#' @param message The message to log
|
|
||||||
#' @param level The log level (default: "INFO")
|
|
||||||
#' @return NULL (used for side effects)
|
|
||||||
#'
|
|
||||||
safe_log <- function(message, level = "INFO") {
|
|
||||||
if (exists("log_message")) {
|
|
||||||
log_message(message, level)
|
|
||||||
} else {
|
|
||||||
if (level %in% c("ERROR", "WARNING")) {
|
|
||||||
warning(message)
|
|
||||||
} else {
|
|
||||||
message(message)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Creates a sub-chunk for use within RMarkdown documents
|
#' Creates a sub-chunk for use within RMarkdown documents
|
||||||
#'
|
#'
|
||||||
#' @param g A ggplot object to render in the sub-chunk
|
#' @param g A ggplot object to render in the sub-chunk
|
||||||
|
|
@ -244,14 +226,14 @@ ci_plot <- function(pivotName,
|
||||||
# Filter for the specific pivot
|
# Filter for the specific pivot
|
||||||
AllPivots2 <- field_boundaries %>% dplyr::filter(field %in% pivotName)
|
AllPivots2 <- field_boundaries %>% dplyr::filter(field %in% pivotName)
|
||||||
|
|
||||||
# Create crop masks for different timepoints using terra functions
|
# Per-field mosaics are already cropped to field boundaries, so use directly without cropping
|
||||||
singlePivot <- terra::crop(current_ci, pivotShape) %>% terra::mask(., pivotShape)
|
singlePivot <- current_ci
|
||||||
singlePivot_m1 <- terra::crop(ci_minus_1, pivotShape) %>% terra::mask(., pivotShape)
|
singlePivot_m1 <- ci_minus_1
|
||||||
singlePivot_m2 <- terra::crop(ci_minus_2, pivotShape) %>% terra::mask(., pivotShape)
|
singlePivot_m2 <- ci_minus_2
|
||||||
|
|
||||||
# Create difference maps
|
# Use difference maps directly (already field-specific)
|
||||||
abs_CI_last_week <- terra::crop(last_week_diff, pivotShape) %>% terra::mask(., pivotShape)
|
abs_CI_last_week <- last_week_diff
|
||||||
abs_CI_three_week <- terra::crop(three_week_diff, pivotShape) %>% terra::mask(., pivotShape)
|
abs_CI_three_week <- three_week_diff
|
||||||
|
|
||||||
# Get planting date
|
# Get planting date
|
||||||
planting_date <- harvesting_data %>%
|
planting_date <- harvesting_data %>%
|
||||||
|
|
@ -822,3 +804,144 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' Aggregate per-field weekly mosaics into a farm-level mosaic
|
||||||
|
#'
|
||||||
|
#' Reads all per-field mosaic TIFs for a given week and merges them into a single farm-level mosaic.
|
||||||
|
#' This is used for overview maps in the report (Script 90).
|
||||||
|
#'
|
||||||
|
#' Per-field mosaics already have proper geospatial metadata (CRS, geotransform) from Script 10,
|
||||||
|
#' so terra can align them automatically without needing field boundaries or extent information.
|
||||||
|
#'
|
||||||
|
#' @param weekly_mosaic_dir Path to weekly_mosaic directory (e.g., "laravel_app/storage/app/{project}/weekly_mosaic")
|
||||||
|
#' @param target_week ISO week number (e.g., 52)
|
||||||
|
#' @param target_year ISO year (e.g., 2025)
|
||||||
|
#' @return SpatRaster object (5-band: R,G,B,NIR,CI) or NULL if no fields found
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' Per-field mosaics are located at: weekly_mosaic/{FIELD}/week_WW_YYYY.tif
|
||||||
|
#' This function:
|
||||||
|
#' 1. Finds all per-field subdirectories
|
||||||
|
#' 2. Loads each field's weekly mosaic
|
||||||
|
#' 3. Merges to a single raster using terra::mosaic() (alignment handled automatically by metadata)
|
||||||
|
#' 4. Returns combined 5-band raster for visualization
|
||||||
|
#'
|
||||||
|
aggregate_per_field_mosaics_to_farm_level <- function(
|
||||||
|
weekly_mosaic_dir,
|
||||||
|
target_week,
|
||||||
|
target_year
|
||||||
|
) {
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
|
||||||
|
# Validate directory exists
|
||||||
|
if (!dir.exists(weekly_mosaic_dir)) {
|
||||||
|
safe_log(paste("Weekly mosaic directory not found:", weekly_mosaic_dir), "WARNING")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Find all per-field subdirectories (non-TIF files at top level)
|
||||||
|
all_items <- list.files(weekly_mosaic_dir, full.names = FALSE)
|
||||||
|
field_dirs <- all_items[
|
||||||
|
!grepl("\\.tif$", all_items, ignore.case = TRUE) &
|
||||||
|
dir.exists(file.path(weekly_mosaic_dir, all_items))
|
||||||
|
]
|
||||||
|
|
||||||
|
if (length(field_dirs) == 0) {
|
||||||
|
safe_log(paste("No per-field directories found in", weekly_mosaic_dir), "WARNING")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Found", length(field_dirs), "field directories. Aggregating week",
|
||||||
|
sprintf("%02d", target_week), target_year), "INFO")
|
||||||
|
|
||||||
|
# Collect rasters from each field
|
||||||
|
raster_list <- list()
|
||||||
|
|
||||||
|
for (field_dir in field_dirs) {
|
||||||
|
field_mosaic_path <- file.path(
|
||||||
|
weekly_mosaic_dir,
|
||||||
|
field_dir,
|
||||||
|
paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif")
|
||||||
|
)
|
||||||
|
|
||||||
|
if (file.exists(field_mosaic_path)) {
|
||||||
|
tryCatch({
|
||||||
|
r <- terra::rast(field_mosaic_path)
|
||||||
|
raster_list[[field_dir]] <- r
|
||||||
|
safe_log(paste("Loaded mosaic for field:", field_dir), "DEBUG")
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Could not load mosaic for field", field_dir, ":", e$message), "WARNING")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length(raster_list) == 0) {
|
||||||
|
safe_log(paste("No field mosaics found for week", sprintf("%02d", target_week), target_year), "WARNING")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(paste("Successfully loaded mosaics for", length(raster_list), "fields"), "INFO")
|
||||||
|
|
||||||
|
# Create a SpatRasterCollection and mosaic using correct terra syntax
|
||||||
|
tryCatch({
|
||||||
|
rsrc <- terra::sprc(raster_list)
|
||||||
|
safe_log(paste("Created SpatRasterCollection with", length(raster_list), "rasters"), "DEBUG")
|
||||||
|
|
||||||
|
# Mosaic the rasters - this merges them into a single continuous raster
|
||||||
|
farm_mosaic <- terra::mosaic(rsrc)
|
||||||
|
|
||||||
|
safe_log(paste("Aggregated", length(raster_list), "per-field mosaics into farm-level mosaic"), "INFO")
|
||||||
|
|
||||||
|
# Verify mosaic was created successfully
|
||||||
|
if (is.null(farm_mosaic)) {
|
||||||
|
stop("mosaic() returned NULL")
|
||||||
|
}
|
||||||
|
|
||||||
|
return(farm_mosaic)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Error during mosaic creation:", e$message), "ERROR")
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Error aggregating per-field mosaics:", e$message), "ERROR")
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#' Get per-field mosaic path (new per-field architecture)
|
||||||
|
#'
|
||||||
|
#' Returns the path to a per-field weekly mosaic for direct visualization.
|
||||||
|
#' Replaces the old cropping workflow: now we load the field's own mosaic instead of cropping farm-level.
|
||||||
|
#'
|
||||||
|
#' @param weekly_mosaic_dir Path to weekly_mosaic directory
|
||||||
|
#' @param field_name Name of the field
|
||||||
|
#' @param target_week ISO week number
|
||||||
|
#' @param target_year ISO year
|
||||||
|
#' @return Path to field-specific mosaic TIF, or NULL if not found
|
||||||
|
#'
|
||||||
|
get_per_field_mosaic_path <- function(
|
||||||
|
weekly_mosaic_dir,
|
||||||
|
field_name,
|
||||||
|
target_week,
|
||||||
|
target_year
|
||||||
|
) {
|
||||||
|
|
||||||
|
path <- file.path(
|
||||||
|
weekly_mosaic_dir,
|
||||||
|
field_name,
|
||||||
|
paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif")
|
||||||
|
)
|
||||||
|
|
||||||
|
if (file.exists(path)) {
|
||||||
|
return(path)
|
||||||
|
} else {
|
||||||
|
safe_log(paste("Per-field mosaic not found for field", field_name,
|
||||||
|
"week", sprintf("%02d", target_week), target_year), "WARNING")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,12 @@
|
||||||
# 1. Python: Download Planet images
|
# 1. Python: Download Planet images
|
||||||
# 2. R 10: Create master grid and split TIFFs
|
# 2. R 10: Create master grid and split TIFFs
|
||||||
# 3. R 20: CI Extraction
|
# 3. R 20: CI Extraction
|
||||||
# 4. R 21: Convert CI RDS to CSV
|
# 4. R 30: Interpolate growth model
|
||||||
# 5. R 30: Interpolate growth model
|
# 5. R 21: Convert CI RDS to CSV (uses Script 30 output)
|
||||||
# 6. Python 31: Harvest imminent weekly
|
# 6. Python 31: Harvest imminent weekly
|
||||||
# 7. R 40: Mosaic creation
|
# 7. R 40: Mosaic creation
|
||||||
# 8. R 80: Calculate KPIs
|
# 8. R 80: Calculate KPIs
|
||||||
|
# 9. R 90 (Agronomic) OR R 91 (Cane Supply): Generate Word Report
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# HOW TO RUN THIS SCRIPT
|
# HOW TO RUN THIS SCRIPT
|
||||||
|
|
@ -30,11 +31,12 @@
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
# *** EDIT THESE VARIABLES ***
|
# *** EDIT THESE VARIABLES ***
|
||||||
end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date()
|
end_date <- as.Date("2026-02-04") # or specify: as.Date("2026-01-27") , Sys.Date()
|
||||||
offset <- 7 # days to look back
|
offset <- 7 # days to look back
|
||||||
project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba"
|
project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba"
|
||||||
data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif"
|
|
||||||
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
|
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
|
||||||
|
migrate_legacy_format <- TRUE # Set to TRUE to migrate from old format (merged_tif/merged_tif_8b) to new format (field_tiles)
|
||||||
|
# *** NOTE: data_source is now unified - all projects use field_tiles after migration ***
|
||||||
# ***************************
|
# ***************************
|
||||||
|
|
||||||
# Format dates
|
# Format dates
|
||||||
|
|
@ -48,23 +50,35 @@ pipeline_success <- TRUE
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
cat("\n========== CHECKING EXISTING OUTPUTS ==========\n")
|
cat("\n========== CHECKING EXISTING OUTPUTS ==========\n")
|
||||||
|
|
||||||
# Check Script 10 outputs (tiled splits)
|
# Check Script 10 outputs (field_tiles with per-field TIFFs)
|
||||||
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
|
# Script 10 outputs to field_tiles/{field_id}/{date}.tif
|
||||||
tiles_dates <- if (dir.exists(tiles_dir)) {
|
field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
|
||||||
list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
|
tiles_dates <- c()
|
||||||
} else {
|
if (dir.exists(field_tiles_dir)) {
|
||||||
c()
|
# Get all field subdirectories
|
||||||
|
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||||
|
if (length(field_dirs) > 0) {
|
||||||
|
# Get unique dates from all field directories
|
||||||
|
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
|
tiles_dates <- unique(sub("\\.tif$", "", all_files))
|
||||||
}
|
}
|
||||||
cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates)))
|
}
|
||||||
|
cat(sprintf("Script 10: %d dates already tiled (field_tiles/)\n", length(tiles_dates)))
|
||||||
|
|
||||||
# Check Script 20 outputs (CI extraction) - daily RDS files
|
# Check Script 20 outputs (CI extraction) - per-field CI TIFFs at field_tiles_CI/{FIELD}/{DATE}.tif
|
||||||
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
|
# NOTE: This is the NEW per-field format, not the old extracted_ci/ flat format
|
||||||
ci_files <- if (dir.exists(ci_daily_dir)) {
|
field_tiles_ci_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
|
||||||
list.files(ci_daily_dir, pattern = "\\.rds$")
|
ci_tiff_dates <- c()
|
||||||
} else {
|
if (dir.exists(field_tiles_ci_dir)) {
|
||||||
c()
|
# Get all field subdirectories
|
||||||
|
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
|
||||||
|
if (length(field_dirs) > 0) {
|
||||||
|
# Get unique dates from all field directories (dates that have been processed through Script 20)
|
||||||
|
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
|
ci_tiff_dates <- unique(sub("\\.tif$", "", all_files))
|
||||||
}
|
}
|
||||||
cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
|
}
|
||||||
|
cat(sprintf("Script 20: %d dates already processed (field_tiles_CI/)\n", length(ci_tiff_dates)))
|
||||||
|
|
||||||
# Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this
|
# Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this
|
||||||
# Instead, check if CI RDS files exist - if they do, 21 should also run
|
# Instead, check if CI RDS files exist - if they do, 21 should also run
|
||||||
|
|
@ -72,7 +86,7 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
|
||||||
cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")
|
cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")
|
||||||
|
|
||||||
# Check Script 40 outputs (mosaics in weekly_tile_max/5x5)
|
# Check Script 40 outputs (mosaics in weekly_tile_max/5x5)
|
||||||
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
|
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
|
||||||
mosaic_files <- if (dir.exists(mosaic_dir)) {
|
mosaic_files <- if (dir.exists(mosaic_dir)) {
|
||||||
list.files(mosaic_dir, pattern = "\\.tif$")
|
list.files(mosaic_dir, pattern = "\\.tif$")
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -90,9 +104,9 @@ kpi_files <- if (dir.exists(kpi_dir)) {
|
||||||
cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))
|
cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))
|
||||||
|
|
||||||
# Determine if scripts should run based on outputs
|
# Determine if scripts should run based on outputs
|
||||||
skip_10 <- length(tiles_dates) > 0 && !force_rerun
|
skip_10 <- FALSE # Script 10 should always run to pick up any new merged_tif files
|
||||||
skip_20 <- length(ci_files) > 0 && !force_rerun
|
skip_20 <- FALSE # Script 20 always runs to process dates in the current window (per-field format)
|
||||||
skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped
|
skip_21 <- FALSE # Skip 21 only if 20 is skipped
|
||||||
skip_40 <- length(mosaic_files) > 0 && !force_rerun
|
skip_40 <- length(mosaic_files) > 0 && !force_rerun
|
||||||
skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks
|
skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks
|
||||||
|
|
||||||
|
|
@ -110,25 +124,50 @@ cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n")
|
||||||
tryCatch({
|
tryCatch({
|
||||||
# Setup paths
|
# Setup paths
|
||||||
base_path <- file.path("laravel_app", "storage", "app", project_dir)
|
base_path <- file.path("laravel_app", "storage", "app", project_dir)
|
||||||
merged_tifs_dir <- file.path(base_path, data_source)
|
|
||||||
|
|
||||||
# Get existing dates from raw TIFFs
|
# Always check merged_tif/ for existing downloads (both modes)
|
||||||
|
# merged_tif/ is where Python downloads go, before Script 10 splits to field_tiles/
|
||||||
|
merged_tifs_dir <- file.path(base_path, "merged_tif")
|
||||||
existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)
|
existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)
|
||||||
|
|
||||||
# Get existing dates from tiles (better indicator of completion)
|
if (migrate_legacy_format) {
|
||||||
existing_tile_dates <- tiles_dates
|
cat(sprintf(" Migration mode: Checking merged_tif/ for existing dates\n"))
|
||||||
|
} else {
|
||||||
|
cat(sprintf(" Production mode: Checking merged_tif/ and field_tiles/ for existing dates\n"))
|
||||||
|
}
|
||||||
|
|
||||||
# Find missing dates in the window
|
# Find missing dates in the window
|
||||||
|
# Window: from (end_date - offset) to end_date
|
||||||
|
# Example: if end_date=2026-02-04 and offset=7, window is 2026-01-28 to 2026-02-04 (8 dates)
|
||||||
start_date <- end_date - offset
|
start_date <- end_date - offset
|
||||||
date_seq <- seq(start_date, end_date, by = "day")
|
date_seq <- seq(start_date, end_date, by = "day")
|
||||||
target_dates <- format(date_seq, "%Y-%m-%d")
|
target_dates <- format(date_seq, "%Y-%m-%d")
|
||||||
|
|
||||||
# Only download if tiles don't exist yet (more reliable than checking raw TIFFs)
|
# Also check field_tiles/ for dates that have already been processed through Script 10
|
||||||
missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]
|
# field_tiles/ contains {field_id}/{date}.tif files - check which dates are present
|
||||||
|
field_tiles_dir <- file.path(base_path, "field_tiles")
|
||||||
|
processed_dates <- c()
|
||||||
|
if (dir.exists(field_tiles_dir)) {
|
||||||
|
# Get all field subdirectories
|
||||||
|
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||||
|
if (length(field_dirs) > 0) {
|
||||||
|
# Get unique dates from all field directories
|
||||||
|
all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
|
processed_dates <- unique(sub("\\.tif$", "", all_files))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates)))
|
# Combine existing dates from both merged_tif and field_tiles
|
||||||
cat(sprintf(" Missing dates in window: %d\n", length(missing_dates)))
|
all_existing_dates <- unique(c(existing_tiff_dates, processed_dates))
|
||||||
|
|
||||||
|
# Compare: which target dates don't exist in merged_tif/ or field_tiles/?
|
||||||
|
missing_dates <- target_dates[!(target_dates %in% all_existing_dates)]
|
||||||
|
|
||||||
|
cat(sprintf(" Existing dates in merged_tif/: %d\n", length(existing_tiff_dates)))
|
||||||
|
cat(sprintf(" Processed dates in field_tiles/: %d\n", length(processed_dates)))
|
||||||
|
cat(sprintf(" Target window: %s to %s (%d dates)\n", start_date, end_date, length(target_dates)))
|
||||||
|
cat(sprintf(" Missing dates to download: %d\n", length(missing_dates)))
|
||||||
|
|
||||||
# Download each missing date
|
# Download each missing date
|
||||||
download_count <- 0
|
download_count <- 0
|
||||||
|
|
@ -170,6 +209,51 @@ tryCatch({
|
||||||
pipeline_success <<- FALSE
|
pipeline_success <<- FALSE
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# MIGRATION: Move legacy format files to new format (if enabled)
|
||||||
|
# ==============================================================================
|
||||||
|
if (pipeline_success && migrate_legacy_format) {
|
||||||
|
cat("\n========== MIGRATION: MOVING LEGACY FORMAT FILES ==========\n")
|
||||||
|
tryCatch({
|
||||||
|
base_path <- file.path("laravel_app", "storage", "app", project_dir)
|
||||||
|
|
||||||
|
# PART 1: Move merged_tif files to field_tiles
|
||||||
|
merged_tif_old <- file.path(base_path, "merged_tif")
|
||||||
|
field_tiles_new <- file.path(base_path, "field_tiles")
|
||||||
|
|
||||||
|
if (dir.exists(merged_tif_old)) {
|
||||||
|
tif_files <- list.files(merged_tif_old, pattern = "\\.tif$", full.names = TRUE)
|
||||||
|
if (length(tif_files) > 0) {
|
||||||
|
dir.create(field_tiles_new, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
for (file in tif_files) {
|
||||||
|
file.rename(file, file.path(field_tiles_new, basename(file)))
|
||||||
|
}
|
||||||
|
cat(sprintf("✓ Moved %d TIFF files from merged_tif/ to field_tiles/\n", length(tif_files)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# PART 2: Move merged_tif_final files (CI) to field_tiles_CI
|
||||||
|
merged_tif_final_old <- file.path(base_path, "merged_tif_final")
|
||||||
|
field_tiles_ci_new <- file.path(base_path, "field_tiles_CI")
|
||||||
|
|
||||||
|
if (dir.exists(merged_tif_final_old)) {
|
||||||
|
ci_files <- list.files(merged_tif_final_old, pattern = "\\.tif$", full.names = TRUE)
|
||||||
|
if (length(ci_files) > 0) {
|
||||||
|
dir.create(field_tiles_ci_new, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
for (file in ci_files) {
|
||||||
|
file.rename(file, file.path(field_tiles_ci_new, basename(file)))
|
||||||
|
}
|
||||||
|
cat(sprintf("✓ Moved %d CI TIFF files from merged_tif_final/ to field_tiles_CI/\n", length(ci_files)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("✓ Migration completed successfully\n")
|
||||||
|
}, error = function(e) {
|
||||||
|
cat("✗ Error in migration:", e$message, "\n")
|
||||||
|
pipeline_success <<- FALSE
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs
|
# SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
@ -178,20 +262,38 @@ if (pipeline_success && !skip_10) {
|
||||||
tryCatch({
|
tryCatch({
|
||||||
# Set environment variables for the script (Script 10 uses these for filtering)
|
# Set environment variables for the script (Script 10 uses these for filtering)
|
||||||
assign("PROJECT", project_dir, envir = .GlobalEnv)
|
assign("PROJECT", project_dir, envir = .GlobalEnv)
|
||||||
|
assign("end_date", end_date, envir = .GlobalEnv)
|
||||||
|
assign("offset", offset, envir = .GlobalEnv)
|
||||||
|
|
||||||
|
# Count field_tiles/ dates BEFORE Script 10 runs
|
||||||
|
field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
|
||||||
|
field_dirs_before <- c()
|
||||||
|
if (dir.exists(field_tiles_dir)) {
|
||||||
|
field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||||
|
if (length(field_dirs_tmp) > 0) {
|
||||||
|
all_files_before <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
|
field_dirs_before <- unique(sub("\\.tif$", "", all_files_before))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# Suppress verbose per-date output, show only summary
|
# Suppress verbose per-date output, show only summary
|
||||||
sink(nullfile())
|
sink(nullfile())
|
||||||
source("r_app/10_create_master_grid_and_split_tiffs.R")
|
source("r_app/10_create_per_field_tiffs.R")
|
||||||
sink()
|
sink()
|
||||||
|
|
||||||
# Verify output
|
# Count field_tiles/ dates AFTER Script 10 runs
|
||||||
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
|
field_dirs_after <- c()
|
||||||
if (dir.exists(tiles_dir)) {
|
if (dir.exists(field_tiles_dir)) {
|
||||||
subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
|
field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||||
cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs)))
|
if (length(field_dirs_tmp) > 0) {
|
||||||
} else {
|
all_files_after <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
cat("✓ Script 10 completed\n")
|
field_dirs_after <- unique(sub("\\.tif$", "", all_files_after))
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate newly added dates
|
||||||
|
newly_added <- length(field_dirs_after) - length(field_dirs_before)
|
||||||
|
cat(sprintf("✓ Script 10 completed - processed %d new dates (total: %d dates in field_tiles/)\n", max(0, newly_added), length(field_dirs_after)))
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
sink()
|
sink()
|
||||||
cat("✗ Error in Script 10:", e$message, "\n")
|
cat("✗ Error in Script 10:", e$message, "\n")
|
||||||
|
|
@ -211,19 +313,71 @@ if (pipeline_success && !skip_20) {
|
||||||
assign("end_date", end_date, envir = .GlobalEnv)
|
assign("end_date", end_date, envir = .GlobalEnv)
|
||||||
assign("offset", offset, envir = .GlobalEnv)
|
assign("offset", offset, envir = .GlobalEnv)
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
source("r_app/20_ci_extraction.R")
|
# If in migration mode, find all dates that need processing
|
||||||
main() # Call main() to execute the script with the environment variables
|
if (migrate_legacy_format) {
|
||||||
|
cat("Migration mode: Finding all dates in field_tiles/ that need CI processing...\n")
|
||||||
|
|
||||||
# Verify CI output was created
|
# Get all dates from field_tiles/
|
||||||
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
|
field_tiles_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles")
|
||||||
if (dir.exists(ci_daily_dir)) {
|
all_dates_in_tiles <- c()
|
||||||
files <- list.files(ci_daily_dir, pattern = "\\.rds$")
|
if (dir.exists(field_tiles_dir_check)) {
|
||||||
cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files)))
|
field_dirs_tmp <- list.dirs(field_tiles_dir_check, full.names = TRUE, recursive = FALSE)
|
||||||
} else {
|
if (length(field_dirs_tmp) > 0) {
|
||||||
cat("✓ Script 20 completed\n")
|
all_files_tmp <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
|
all_dates_in_tiles <- unique(sub("\\.tif$", "", all_files_tmp))
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get dates already processed in field_tiles_CI/
|
||||||
|
field_tiles_ci_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
|
||||||
|
processed_ci_dates <- c()
|
||||||
|
if (dir.exists(field_tiles_ci_check)) {
|
||||||
|
field_dirs_ci <- list.dirs(field_tiles_ci_check, full.names = TRUE, recursive = FALSE)
|
||||||
|
if (length(field_dirs_ci) > 0) {
|
||||||
|
all_files_ci <- list.files(field_dirs_ci, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
|
processed_ci_dates <- unique(sub("\\.tif$", "", all_files_ci))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get dates already in old RDS format
|
||||||
|
old_rds_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
|
||||||
|
processed_rds_dates <- c()
|
||||||
|
if (dir.exists(old_rds_dir)) {
|
||||||
|
rds_files <- list.files(old_rds_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$")
|
||||||
|
processed_rds_dates <- unique(sub("\\.rds$", "", rds_files))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Find dates missing from either location
|
||||||
|
dates_missing_ci <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_ci_dates)]
|
||||||
|
dates_missing_rds <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_rds_dates)]
|
||||||
|
dates_to_process_migration <- sort(unique(c(dates_missing_ci, dates_missing_rds)))
|
||||||
|
|
||||||
|
cat(sprintf(" All dates in field_tiles/: %d\n", length(all_dates_in_tiles)))
|
||||||
|
cat(sprintf(" Already in field_tiles_CI/: %d\n", length(processed_ci_dates)))
|
||||||
|
cat(sprintf(" Already in extracted_ci/daily_vals/: %d\n", length(processed_rds_dates)))
|
||||||
|
cat(sprintf(" Dates needing processing: %d\n", length(dates_to_process_migration)))
|
||||||
|
|
||||||
|
if (length(dates_to_process_migration) > 0) {
|
||||||
|
assign("dates_to_process", dates_to_process_migration, envir = .GlobalEnv)
|
||||||
|
cat(sprintf(" Will process: %s to %s\n", dates_to_process_migration[1], dates_to_process_migration[length(dates_to_process_migration)]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
source("r_app/20_ci_extraction_per_field.R")
|
||||||
|
main()
|
||||||
|
|
||||||
|
# Verify output
|
||||||
|
field_tiles_ci_verify <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI")
|
||||||
|
tiff_count <- 0
|
||||||
|
if (dir.exists(field_tiles_ci_verify)) {
|
||||||
|
field_dirs_verify <- list.dirs(field_tiles_ci_verify, full.names = TRUE, recursive = FALSE)
|
||||||
|
if (length(field_dirs_verify) > 0) {
|
||||||
|
all_files_verify <- list.files(field_dirs_verify, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
|
||||||
|
tiff_count <- length(all_files_verify)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cat(sprintf("✓ Script 20 completed - %d CI TIFFs in field_tiles_CI/\n", tiff_count))
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
cat("✗ Error in Script 20:", e$message, "\n")
|
cat("✗ Error in Script 20:", e$message, "\n")
|
||||||
pipeline_success <<- FALSE
|
pipeline_success <<- FALSE
|
||||||
|
|
@ -233,7 +387,35 @@ if (pipeline_success && !skip_20) {
|
||||||
}
|
}
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# SCRIPT 21: CONVERT CI RDS TO CSV
|
# SCRIPT 30: INTERPOLATE GROWTH MODEL
|
||||||
|
# ==============================================================================
|
||||||
|
if (pipeline_success) {
|
||||||
|
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
|
||||||
|
tryCatch({
|
||||||
|
# Set environment variables for the script
|
||||||
|
assign("end_date", end_date, envir = .GlobalEnv)
|
||||||
|
assign("offset", offset, envir = .GlobalEnv)
|
||||||
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
|
|
||||||
|
source("r_app/30_interpolate_growth_model.R")
|
||||||
|
main() # Call main() to execute the script with the environment variables
|
||||||
|
|
||||||
|
# Verify interpolated output
|
||||||
|
growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
|
||||||
|
if (dir.exists(growth_dir)) {
|
||||||
|
files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
|
||||||
|
cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
|
||||||
|
} else {
|
||||||
|
cat("✓ Script 30 completed\n")
|
||||||
|
}
|
||||||
|
}, error = function(e) {
|
||||||
|
cat("✗ Error in Script 30:", e$message, "\n")
|
||||||
|
pipeline_success <<- FALSE
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# SCRIPT 21: CONVERT CI RDS TO CSV (uses Script 30 output)
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
if (pipeline_success && !skip_21) {
|
if (pipeline_success && !skip_21) {
|
||||||
cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n")
|
cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n")
|
||||||
|
|
@ -262,35 +444,6 @@ if (pipeline_success && !skip_21) {
|
||||||
cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n")
|
cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# SCRIPT 30: INTERPOLATE GROWTH MODEL
|
|
||||||
# ==============================================================================
|
|
||||||
if (pipeline_success) {
|
|
||||||
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
|
|
||||||
tryCatch({
|
|
||||||
# Set environment variables for the script
|
|
||||||
assign("end_date", end_date, envir = .GlobalEnv)
|
|
||||||
assign("offset", offset, envir = .GlobalEnv)
|
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
source("r_app/30_interpolate_growth_model.R")
|
|
||||||
main() # Call main() to execute the script with the environment variables
|
|
||||||
|
|
||||||
# Verify interpolated output
|
|
||||||
growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
|
|
||||||
if (dir.exists(growth_dir)) {
|
|
||||||
files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
|
|
||||||
cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
|
|
||||||
} else {
|
|
||||||
cat("✓ Script 30 completed\n")
|
|
||||||
}
|
|
||||||
}, error = function(e) {
|
|
||||||
cat("✗ Error in Script 30:", e$message, "\n")
|
|
||||||
pipeline_success <<- FALSE
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# PYTHON 31: HARVEST IMMINENT WEEKLY
|
# PYTHON 31: HARVEST IMMINENT WEEKLY
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
@ -335,9 +488,8 @@ if (pipeline_success && !skip_40) {
|
||||||
assign("end_date", end_date, envir = .GlobalEnv)
|
assign("end_date", end_date, envir = .GlobalEnv)
|
||||||
assign("offset", offset, envir = .GlobalEnv)
|
assign("offset", offset, envir = .GlobalEnv)
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
source("r_app/40_mosaic_creation.R")
|
source("r_app/40_mosaic_creation_per_field.R")
|
||||||
main() # Call main() to execute the script with the environment variables
|
main() # Call main() to execute the script with the environment variables
|
||||||
|
|
||||||
# Verify mosaic output
|
# Verify mosaic output
|
||||||
|
|
@ -368,7 +520,6 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur
|
||||||
assign("end_date_str", end_date_str, envir = .GlobalEnv)
|
assign("end_date_str", end_date_str, envir = .GlobalEnv)
|
||||||
assign("offset", offset, envir = .GlobalEnv)
|
assign("offset", offset, envir = .GlobalEnv)
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
source("r_app/80_calculate_kpis.R")
|
source("r_app/80_calculate_kpis.R")
|
||||||
main() # Call main() to execute the script with the environment variables
|
main() # Call main() to execute the script with the environment variables
|
||||||
|
|
@ -389,6 +540,92 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# SCRIPT 90/91: GENERATE WORD REPORTS (CLIENT-TYPE SPECIFIC)
|
||||||
|
# ==============================================================================
|
||||||
|
if (pipeline_success) {
|
||||||
|
# Determine client type from project mapping
|
||||||
|
source("r_app/parameters_project.R")
|
||||||
|
source("r_app/00_common_utils.R")
|
||||||
|
client_type <- get_client_type(project_dir)
|
||||||
|
|
||||||
|
if (client_type == "agronomic_support") {
|
||||||
|
# SCRIPT 90: Agronomic Support Report (for Aura)
|
||||||
|
cat("\n========== RUNNING SCRIPT 90: AGRONOMIC SUPPORT REPORT (WORD) ==========\n")
|
||||||
|
tryCatch({
|
||||||
|
# Render the R Markdown file with parameters
|
||||||
|
# The Rmd file will load parameters_project and utilities internally
|
||||||
|
rmarkdown::render(
|
||||||
|
"r_app/90_CI_report_with_kpis_simple.Rmd",
|
||||||
|
params = list(
|
||||||
|
data_dir = project_dir,
|
||||||
|
report_date = end_date,
|
||||||
|
mail_day = "Monday",
|
||||||
|
borders = TRUE,
|
||||||
|
ci_plot_type = "both",
|
||||||
|
colorblind_friendly = FALSE,
|
||||||
|
facet_by_season = FALSE,
|
||||||
|
x_axis_unit = "days"
|
||||||
|
),
|
||||||
|
output_file = sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str),
|
||||||
|
output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"),
|
||||||
|
quiet = FALSE,
|
||||||
|
knit_root_dir = getwd()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify report was created
|
||||||
|
report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports",
|
||||||
|
sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str))
|
||||||
|
if (file.exists(report_file)) {
|
||||||
|
cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", basename(report_file)))
|
||||||
|
} else {
|
||||||
|
cat("⚠ Script 90 report file not found - check rendering\n")
|
||||||
|
}
|
||||||
|
}, error = function(e) {
|
||||||
|
cat("✗ Error in Script 90:", e$message, "\n")
|
||||||
|
print(e)
|
||||||
|
pipeline_success <<- FALSE
|
||||||
|
})
|
||||||
|
} else if (client_type == "cane_supply") {
|
||||||
|
# SCRIPT 91: Cane Supply Report (for Angata, Chemba, Xinavane, ESA)
|
||||||
|
cat("\n========== RUNNING SCRIPT 91: CANE SUPPLY REPORT (WORD) ==========\n")
|
||||||
|
tryCatch({
|
||||||
|
# Render the R Markdown file with parameters
|
||||||
|
# The Rmd file will load parameters_project and utilities internally
|
||||||
|
rmarkdown::render(
|
||||||
|
"r_app/91_CI_report_with_kpis_Angata.Rmd",
|
||||||
|
params = list(
|
||||||
|
data_dir = project_dir,
|
||||||
|
report_date = end_date,
|
||||||
|
mail_day = "Monday",
|
||||||
|
borders = TRUE,
|
||||||
|
ci_plot_type = "both",
|
||||||
|
colorblind_friendly = FALSE,
|
||||||
|
facet_by_season = FALSE,
|
||||||
|
x_axis_unit = "days"
|
||||||
|
),
|
||||||
|
output_file = sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str),
|
||||||
|
output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"),
|
||||||
|
quiet = FALSE,
|
||||||
|
knit_root_dir = getwd()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify report was created
|
||||||
|
report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports",
|
||||||
|
sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str))
|
||||||
|
if (file.exists(report_file)) {
|
||||||
|
cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", basename(report_file)))
|
||||||
|
} else {
|
||||||
|
cat("⚠ Script 91 report file not found - check rendering\n")
|
||||||
|
}
|
||||||
|
}, error = function(e) {
|
||||||
|
cat("✗ Error in Script 91:", e$message, "\n")
|
||||||
|
print(e)
|
||||||
|
pipeline_success <<- FALSE
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# SUMMARY
|
# SUMMARY
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
@ -401,4 +638,4 @@ if (pipeline_success) {
|
||||||
} else {
|
} else {
|
||||||
cat("Status: ✗ Pipeline failed - check errors above\n")
|
cat("Status: ✗ Pipeline failed - check errors above\n")
|
||||||
}
|
}
|
||||||
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n")
|
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 30 → R 21 → Python 31 → R 40 → R 80 → R 90/91\n")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue