diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index 4175476..0ccc373 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -122,7 +122,7 @@
## Documentation & File Creation Policy
**IMPORTANT: Minimize markdown file creation to reduce repo clutter**
-- **Do NOT create** README.md, START_HERE.md, QUICK_START.md, INDEX.md automatically
+- **Do NOT create** README.md, START_HERE.md, QUICK_START.md, INDEX.md or any other .mD files automatically
- **Only create .md files when:**
- User explicitly requests it
- A single index/guide for an entire folder (ONE per folder max)
diff --git a/figure/sub_chunk_8433-1.png b/figure/sub_chunk_8433-1.png
deleted file mode 100644
index 160a733..0000000
Binary files a/figure/sub_chunk_8433-1.png and /dev/null differ
diff --git a/figure/sub_chunk_9696-1.png b/figure/sub_chunk_9696-1.png
deleted file mode 100644
index b4564b8..0000000
Binary files a/figure/sub_chunk_9696-1.png and /dev/null differ
diff --git a/python_app/01_planet_download.ipynb b/python_app/01_planet_download.ipynb
index 70b763a..56a404f 100644
--- a/python_app/01_planet_download.ipynb
+++ b/python_app/01_planet_download.ipynb
@@ -54,7 +54,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"id": "eb1fb662-0e25-4ca9-8317-c6953290842b",
"metadata": {},
"outputs": [],
@@ -79,30 +79,30 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"id": "060396e0-e5ee-4b54-b211-5d8bfcba167f",
"metadata": {},
"outputs": [],
"source": [
"#project = 'Mkulazi_trail' #or xinavane or chemba_test_8b\n",
"#project = 'xinavane' #or xinavane or chemba_test_8b\n",
- "project = 'aura' #or xinavane or chemba_test_8b\n"
+ "project = 'angata' #or xinavane or chemba_test_8b\n"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 35,
"id": "c9f79e81-dff8-4109-8d26-6c423142dcf2",
"metadata": {},
"outputs": [],
"source": [
"# Adjust the number of days needed\n",
- "days = 7"
+ "days = 5"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 36,
"id": "e18bdf8f-be4b-44ab-baaa-de5de60d92cb",
"metadata": {},
"outputs": [],
@@ -124,7 +124,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 37,
"id": "3f7c8e04-4569-457b-b39d-283582c4ba36",
"metadata": {},
"outputs": [],
@@ -149,7 +149,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 38,
"id": "244b5752-4f02-4347-9278-f6a0a46b88f4",
"metadata": {},
"outputs": [],
@@ -237,7 +237,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 39,
"id": "848dc773-70d6-4ae6-b05c-d6ebfb41624d",
"metadata": {},
"outputs": [
@@ -247,13 +247,11 @@
"text": [
"Monthly time windows:\n",
"\n",
- "2025-12-12\n",
- "2025-12-13\n",
- "2025-12-14\n",
- "2025-12-15\n",
- "2025-12-16\n",
- "2025-12-17\n",
- "2025-12-18\n"
+ "2026-01-02\n",
+ "2026-01-03\n",
+ "2026-01-04\n",
+ "2026-01-05\n",
+ "2026-01-06\n"
]
}
],
@@ -295,7 +293,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 40,
"id": "c803e373-2567-4233-af7d-0d2d6f7d4f8e",
"metadata": {},
"outputs": [],
@@ -305,7 +303,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 41,
"id": "dc24d54e-2272-4f30-bcf5-4d8fc381915c",
"metadata": {},
"outputs": [],
@@ -315,7 +313,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 42,
"id": "cd071b42-d0cd-4e54-8f88-ad1a339748e3",
"metadata": {},
"outputs": [],
@@ -325,7 +323,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 43,
"id": "301d12e4-e47a-4034-aec0-aa5673e64935",
"metadata": {},
"outputs": [
@@ -333,14 +331,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Area bounding box: BBox(((35.16365354880403, -0.169202795759772), (35.252909781631075, -0.085689722918499)), crs=CRS('4326'))\n",
+ "Area bounding box: BBox(((34.43227940231912, -1.412210768346171), (34.9511271057322, -0.7403944081102)), crs=CRS('4326'))\n",
"\n"
]
}
],
"source": [
"bbox_splitter = BBoxSplitter(\n",
- " shapely_geometries, CRS.WGS84, (5, 5), reduce_bbox_sizes=True\n",
+ " shapely_geometries, CRS.WGS84, (20, 20), reduce_bbox_sizes=True\n",
") # bounding box will be split into a grid of 5x4 bounding boxes\n",
"\n",
"# based on https://github.com/sentinel-hub/sentinelhub-py/blob/master/examples/large_area_utilities.ipynb \n",
@@ -353,20 +351,20 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 44,
"id": "431f6856-8d7e-4868-b627-20deeb47d77e",
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
- ""
+ ""
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 14,
+ "execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
@@ -379,7 +377,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 45,
"id": "18655785",
"metadata": {},
"outputs": [],
@@ -400,7 +398,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 46,
"id": "a6fc418f",
"metadata": {},
"outputs": [],
@@ -415,7 +413,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 47,
"id": "ebc416be",
"metadata": {},
"outputs": [
@@ -423,10 +421,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "['2025-12-12', '2025-12-13', '2025-12-14', '2025-12-15', '2025-12-16', '2025-12-17']\n",
- "Total slots: 7\n",
- "Available slots: 6\n",
- "Excluded slots due to empty dates: 1\n"
+ "[]\n",
+ "Total slots: 5\n",
+ "Available slots: 0\n",
+ "Excluded slots due to empty dates: 5\n"
]
}
],
diff --git a/python_app/download_8band_pu_optimized.py b/python_app/download_8band_pu_optimized.py
index f03f118..4fb5c4f 100644
--- a/python_app/download_8band_pu_optimized.py
+++ b/python_app/download_8band_pu_optimized.py
@@ -4,27 +4,41 @@ Planet 4-Band Download Script - PU-Optimized (RGB+NIR, Cloud-Masked, uint16)
============================================================================
Strategy: Minimize Processing Units using three techniques:
- 1. 4-band output (RGB+NIR) with cloud masking on server (uint16, not FLOAT32)
- → Cuts data transfer by ~60% (4 bands uint16 vs 9 bands FLOAT32)
- 2. Dynamically reduced bounding boxes (reduce_bbox_sizes=True)
- → Shrinks tiles to fit field geometry boundaries, reducing wasted pixels
- 3. Date availability filtering + geometry-aware grid
- → Skips empty dates and non-field areas
+ 1. 4-band output (RGB+NIR) with cloud masking on server (uint16, not FLOAT32)
+ → Cuts data transfer by ~60% (4 bands uint16 vs 9 bands FLOAT32)
+ 2. Dynamically reduced bounding boxes (reduce_bbox_sizes=True)
+ → Shrinks tiles to fit field geometry boundaries, reducing wasted pixels
+ 3. Date availability filtering + geometry-aware grid
+ → Skips empty dates and non-field areas
Usage:
- python download_8band_pu_optimized.py [PROJECT] [--date DATE]
-
-Example:
- python download_8band_pu_optimized.py angata --date 2024-01-15
- python download_8band_pu_optimized.py chemba # Uses today's date
+ python download_8band_pu_optimized.py [PROJECT] [OPTIONS]
+
+Arguments:
+ PROJECT Project name (angata, chemba, xinavane, etc.) [required]
+
+Options:
+ --date DATE Date to download (YYYY-MM-DD). Default: today
+ --resolution RES Resolution in meters (default: 3)
+ --skip-merge Skip merge step (download only, keep individual tiles)
+ --cleanup Delete intermediate single_images folder after merge
+ --clear-singles Clear single_images_8b folder before download
+ --clear-merged Clear merged_tif_8b and merged_virtual_8b folders before download
+ --clear-all Clear all output folders (singles, merged, virtual) before download
+
+Examples:
+ python download_8band_pu_optimized.py angata --date 2024-01-15
+ python download_8band_pu_optimized.py chemba # Uses today's date
+ python download_8band_pu_optimized.py xinavane --clear-singles --cleanup
+ python download_8band_pu_optimized.py angata --clear-all --resolution 5
Cost Model:
- - 4-band uint16 with cloud masking: ~50% lower cost than 9-band FLOAT32
- - Reduced bbox sizes: ~10-20% lower cost due to smaller average tile size
- - Total expected PU: ~1,500-2,000 per date (vs 5,865 with 9-band approach)
- - Requests: Slightly higher (~50-60 tiles) but within 700k budget
-
- Expected result: ~75% PU savings with dynamic geometry-fitted grid
+ - 4-band uint16 with cloud masking: ~50% lower cost than 9-band FLOAT32
+ - Reduced bbox sizes: ~10-20% lower cost due to smaller average tile size
+ - Total expected PU: ~1,500-2,000 per date (vs 5,865 with 9-band approach)
+ - Requests: Slightly higher (~50-60 tiles) but within 700k budget
+
+ Expected result: ~75% PU savings with dynamic geometry-fitted grid
"""
import os
@@ -73,24 +87,35 @@ def setup_config():
# ============================================================================
-# EVALSCRIPT: 5 bands (RGB + NIR + UDM1) - raw passthrough, uint16 output
+# EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output
# ============================================================================
-EVALSCRIPT_5BAND_RAW = """
+EVALSCRIPT_4BAND_MASKED = """
//VERSION=3
function setup() {
return {
input: [{
- bands: ["red", "green", "blue", "nir", "udm1"]
+ bands: ["red", "green", "blue", "nir", "udm1"],
+ units: "DN"
}],
output: {
- bands: 5,
- sampleType: "UINT16"
+ bands: 4
}
};
}
function evaluatePixel(sample) {
- return [sample.red, sample.green, sample.blue, sample.nir, sample.udm1];
+ // Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0)
+ // This reduces output pixels and avoids NaN interpolation on client side
+ if (sample.udm1 == 0) {
+ // Scale reflectance: DN → [0, 1] range
+ var scaledRed = 2.5 * sample.red / 10000;
+ var scaledGreen = 2.5 * sample.green / 10000;
+ var scaledBlue = 2.5 * sample.blue / 10000;
+ var scaledNIR = 2.5 * sample.nir / 10000;
+ return [scaledRed, scaledGreen, scaledBlue, scaledNIR];
+ } else {
+ return [NaN, NaN, NaN, NaN];
+ }
}
"""
@@ -152,17 +177,18 @@ def create_optimal_grid_with_filtering(
nx_base = max(1, int(np.ceil(width_m / max_size_m)))
ny_base = max(1, int(np.ceil(height_m / max_size_m)))
- # Use EXTRA FINE grid (3x multiplier) with reduce_bbox_sizes=True
- # This creates many more, smaller tiles that hug geometry boundaries very tightly
- # 3x multiplier = 24×30 theoretical tiles → ~150-180 active after reduce_bbox_sizes
- nx_fine = nx_base * 3
- ny_fine = ny_base * 3
+ # Use FINE grid (6x multiplier) with reduce_bbox_sizes=True
+ # This creates many smaller tiles that fit field geometry boundaries tightly
+ # 6x multiplier = ~20×24 theoretical tiles → ~120-150 active after reduce_bbox_sizes
+ # Avoids creating tiles that shrink to 0 pixels (as happened with 7x)
+ nx_fine = nx_base * 5
+ ny_fine = ny_base * 5
- print(f"\nGrid Calculation (extra fine grid with reduce_bbox_sizes=True):")
+ print(f"\nGrid Calculation (fine grid with reduce_bbox_sizes=True):")
print(f" Area extent: {width_m:.0f}m × {height_m:.0f}m")
print(f" Max bbox size: {max_size_m:.0f}m ({max_pixels}px @ {resolution}m)")
print(f" Base grid: {nx_base}×{ny_base} = {nx_base*ny_base} tiles")
- print(f" Extra fine grid (3x): {nx_fine}×{ny_fine} = {nx_fine*ny_fine} theoretical tiles")
+ print(f" Fine grid (6x): {nx_fine}×{ny_fine} = {nx_fine*ny_fine} theoretical tiles")
# Convert geometries to Shapely for BBoxSplitter
shapely_geoms = [geom for geom in gdf.geometry]
@@ -262,9 +288,9 @@ def download_tile(
try:
size = bbox_to_dimensions(bbox, resolution=resolution)
- # Create download request with 5-band raw passthrough evalscript (uint16)
+ # Create download request with 4-band cloud-masked evalscript (uint16)
request = SentinelHubRequest(
- evalscript=EVALSCRIPT_5BAND_RAW,
+ evalscript=EVALSCRIPT_4BAND_MASKED,
input_data=[
SentinelHubRequest.input_data(
data_collection=byoc,
@@ -331,8 +357,8 @@ def download_date(
if download_tile(date_str, bbox, output_dir, config, byoc, resolution):
successful += 1
- # Delay to avoid rate limiting (0.1s between requests - can be aggressive with small tiles)
- time.sleep(0.05)
+ # Delay to avoid rate limiting (0.002s between requests - can be aggressive with small tiles)
+ time.sleep(0.002)
print(f"\n Result: {successful}/{len(bbox_list)} tiles downloaded")
return successful
@@ -376,7 +402,7 @@ def merge_tiles(date_str: str, base_path: Path) -> bool:
# Convert to compressed GeoTIFF
print(f" Converting to GeoTIFF...")
options = gdal.TranslateOptions(
- outputType=gdal.GDT_UInt16, # Keep as uint16 (raw DN values)
+ outputType=gdal.GDT_Float32,
creationOptions=[
'COMPRESS=LZW',
'TILED=YES',
diff --git a/python_app/download_angata_3years.bat b/python_app/download_angata_3years.bat
deleted file mode 100644
index d0546d4..0000000
--- a/python_app/download_angata_3years.bat
+++ /dev/null
@@ -1,24 +0,0 @@
-@echo off
-REM Download 3 years of Planet data for Angata (missing dates only)
-REM Adjust start/end dates as needed
-
-echo ============================================================
-echo PLANET SATELLITE DATA DOWNLOAD - 3 YEAR RANGE
-echo ============================================================
-
-REM Activate conda environment
-call conda activate pytorch_gpu
-
-REM Download from 2023-01-01 to 2025-12-31 (adjust dates as needed)
-REM The script will automatically skip dates that already exist
-python download_planet_missing_dates.py ^
- --project angata ^
- --start 2023-01-01 ^
- --end 2025-12-15 ^
- --resolution 3
-
-echo.
-echo ============================================================
-echo Download complete!
-echo ============================================================
-pause
diff --git a/python_app/planet_download_with_ocm copy.ipynb b/python_app/experiments/planet_download_with_ocm copy.ipynb
similarity index 100%
rename from python_app/planet_download_with_ocm copy.ipynb
rename to python_app/experiments/planet_download_with_ocm copy.ipynb
diff --git a/python_app/planet_download_with_ocm.ipynb b/python_app/experiments/planet_download_with_ocm.ipynb
similarity index 100%
rename from python_app/planet_download_with_ocm.ipynb
rename to python_app/experiments/planet_download_with_ocm.ipynb
diff --git a/python_scripts/generate_ci_graphs_dashboard.py b/python_app/python_scripts/generate_ci_graphs_dashboard.py
similarity index 100%
rename from python_scripts/generate_ci_graphs_dashboard.py
rename to python_app/python_scripts/generate_ci_graphs_dashboard.py
diff --git a/python_scripts/generate_interactive_ci_dashboard.py b/python_app/python_scripts/generate_interactive_ci_dashboard.py
similarity index 100%
rename from python_scripts/generate_interactive_ci_dashboard.py
rename to python_app/python_scripts/generate_interactive_ci_dashboard.py
diff --git a/python_app/01_planet_download.py b/python_app/python_scripts/old/01_planet_download.py
similarity index 100%
rename from python_app/01_planet_download.py
rename to python_app/python_scripts/old/01_planet_download.py
diff --git a/python_app/Chemba_download.ipynb b/python_app/python_scripts/old/Chemba_download.ipynb
similarity index 100%
rename from python_app/Chemba_download.ipynb
rename to python_app/python_scripts/old/Chemba_download.ipynb
diff --git a/python_app/Chemba_download_old.ipynb b/python_app/python_scripts/old/Chemba_download_old.ipynb
similarity index 100%
rename from python_app/Chemba_download_old.ipynb
rename to python_app/python_scripts/old/Chemba_download_old.ipynb
diff --git a/python_app/call_planet_download.py b/python_app/python_scripts/old/call_planet_download.py
similarity index 100%
rename from python_app/call_planet_download.py
rename to python_app/python_scripts/old/call_planet_download.py
diff --git a/python_app/planet_download.ipynb b/python_app/python_scripts/old/planet_download.ipynb
similarity index 100%
rename from python_app/planet_download.ipynb
rename to python_app/python_scripts/old/planet_download.ipynb
diff --git a/python_app/planet_download_8band.ipynb b/python_app/python_scripts/old/planet_download_8band.ipynb
similarity index 100%
rename from python_app/planet_download_8band.ipynb
rename to python_app/python_scripts/old/planet_download_8band.ipynb
diff --git a/python_app/planet_download_8band_optimized.ipynb b/python_app/python_scripts/old/planet_download_8band_optimized.ipynb
similarity index 100%
rename from python_app/planet_download_8band_optimized.ipynb
rename to python_app/python_scripts/old/planet_download_8band_optimized.ipynb
diff --git a/python_app/test_merge.py b/python_app/python_scripts/old/test_merge.py
similarity index 100%
rename from python_app/test_merge.py
rename to python_app/python_scripts/old/test_merge.py
diff --git a/r_app/01_create_master_grid_and_split_tiffs.R b/r_app/01_create_master_grid_and_split_tiffs.R
new file mode 100644
index 0000000..a59057d
--- /dev/null
+++ b/r_app/01_create_master_grid_and_split_tiffs.R
@@ -0,0 +1,271 @@
+#' Combined: Create master grid and split TIFFs into tiles
+#' ====================================================================
+#'
+#' Purpose:
+#' 1. Check all daily TIFFs for matching extents
+#' 2. Create master 5×5 grid covering all TIFFs
+#' 3. Split each daily TIFF into 25 tiles using the master grid
+#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
+
+library(terra)
+library(sf)
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+PROJECT <- "angata"
+TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
+OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split")
+
+# Grid dimensions will be auto-determined based on ROI size
+# Default: 5×5 = 25 tiles
+# If ROI < 10×10 km: 1×1 = 1 tile (no splitting needed)
+GRID_NROWS <- 5
+GRID_NCOLS <- 5
+
+cat("Combined: Create Master Grid (5x5) and Split TIFFs into Tiles\n")
+
+# ============================================================================
+# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
+# ============================================================================
+
+cat("\n[PART 1] Creating Master Grid\n")
+
+cat("\n[1] Checking TIFF extents...\n")
+
+tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
+tiff_files <- sort(tiff_files)
+
+if (length(tiff_files) == 0) {
+ stop("No TIFF files found in ", TIFF_FOLDER)
+}
+
+cat(" Found", length(tiff_files), "TIFF file(s)\n")
+
+# Load all extents
+extents <- list()
+for (i in seq_along(tiff_files)) {
+ tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
+ raster <- terra::rast(tiff_path)
+ ext <- terra::ext(raster)
+ extents[[i]] <- ext
+}
+
+# Check if all extents match
+cat("\n[2] Comparing extents...\n")
+
+tolerance <- 1e-8
+all_match <- TRUE
+first_ext <- extents[[1]]
+
+for (i in 2:length(extents)) {
+ curr_ext <- extents[[i]]
+ match <- (
+ abs(curr_ext$xmin - first_ext$xmin) < tolerance &&
+ abs(curr_ext$xmax - first_ext$xmax) < tolerance &&
+ abs(curr_ext$ymin - first_ext$ymin) < tolerance &&
+ abs(curr_ext$ymax - first_ext$ymax) < tolerance
+ )
+ if (!match) {
+ all_match <- FALSE
+ cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "")
+ cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ",
+ "Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "")
+ cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ",
+ "Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "")
+ }
+}
+
+if (all_match) {
+ cat(" ✓ All TIFF extents MATCH perfectly!\n")
+} else {
+ cat(" ⚠️ Extents differ - creating master extent covering all\n")
+}
+
+# Create master extent
+cat("\n[3] Creating master extent...\n")
+
+master_xmin <- min(sapply(extents, function(e) e$xmin))
+master_xmax <- max(sapply(extents, function(e) e$xmax))
+master_ymin <- min(sapply(extents, function(e) e$ymin))
+master_ymax <- max(sapply(extents, function(e) e$ymax))
+
+x_range_m <- (master_xmax - master_xmin) * 111320
+y_range_m <- (master_ymax - master_ymin) * 111320
+
+cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ",
+ "Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "")
+cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "")
+
+# Auto-determine grid size based on ROI dimensions
+if (x_range_m < 10000 && y_range_m < 10000) {
+ cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n")
+ GRID_NROWS <- 1
+ GRID_NCOLS <- 1
+} else {
+ cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n")
+ GRID_NROWS <- 5
+ GRID_NCOLS <- 5
+}
+
+N_TILES <- GRID_NROWS * GRID_NCOLS
+
+# Check if master grid already exists
+cat("\n[4] Checking if master grid exists...\n")
+
+master_grid_file <- file.path(OUTPUT_FOLDER, "master_grid_5x5.geojson")
+
+if (file.exists(master_grid_file)) {
+ cat(" ✓ Master grid exists! Loading existing grid...\n")
+ master_grid_sf <- st_read(master_grid_file, quiet = TRUE)
+ master_grid_vect <- terra::vect(master_grid_file)
+ cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
+} else {
+ cat(" Grid does not exist. Creating new master grid...\n")
+
+ # Create 5×5 grid
+ cat("\n[5] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
+
+ master_bbox <- st_bbox(c(
+ xmin = master_xmin,
+ xmax = master_xmax,
+ ymin = master_ymin,
+ ymax = master_ymax
+ ), crs = 4326)
+
+ bbox_sf <- st_as_sfc(master_bbox)
+
+ master_grid <- st_make_grid(
+ bbox_sf,
+ n = c(GRID_NCOLS, GRID_NROWS),
+ what = "polygons"
+ )
+
+ master_grid_sf <- st_sf(
+ tile_id = sprintf("%02d", 1:length(master_grid)),
+ geometry = master_grid
+ )
+
+ cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "")
+
+ # Convert to SpatVector for use in makeTiles
+ master_grid_vect <- terra::vect(master_grid_sf)
+
+ # Save master grid
+ if (!dir.exists(OUTPUT_FOLDER)) {
+ dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
+ }
+ st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
+ cat(" ✓ Master grid saved to: master_grid_5x5.geojson\n")
+}
+
+# ============================================================================
+# PART 2: SPLIT EACH TIFF INTO TILES
+# ============================================================================
+
+cat("\n[PART 2] Splitting TIFFs into Tiles\n")
+
+cat("\n[6] Splitting TIFFs using master grid...\n")
+
+total_tiles_created <- 0
+
+for (file_idx in seq_along(tiff_files)) {
+ tiff_file <- tiff_files[file_idx]
+ date_str <- gsub("\\.tif$", "", tiff_file)
+
+ cat("\n Processing: ", tiff_file, "\n", sep = "")
+
+ # Load TIFF
+ tiff_path <- file.path(TIFF_FOLDER, tiff_file)
+ raster <- terra::rast(tiff_path)
+
+ dims <- dim(raster)
+ cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
+
+ # Create date-specific output folder
+ date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
+ if (!dir.exists(date_output_folder)) {
+ dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
+ }
+
+ cat(" Splitting into ", N_TILES, " tiles using master grid...\n", sep = "")
+
+ # Split using master grid zones
+ tiles_list <- terra::makeTiles(
+ x = raster,
+ y = master_grid_vect,
+ filename = file.path(date_output_folder, "tile.tif"),
+ overwrite = TRUE
+ )
+
+ cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
+
+ # Rename tiles to [DATE]_[TILE_ID].tif
+ cat(" Renaming tiles...\n")
+
+ for (tile_idx in seq_along(tiles_list)) {
+ source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
+ tile_id <- sprintf("%02d", tile_idx)
+ final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
+
+ if (file.exists(source_file)) {
+ file.rename(source_file, final_file)
+ }
+ }
+
+ cat(" ✓ Saved ", N_TILES, " tiles in folder: ", date_str, "/\n", sep = "")
+ total_tiles_created <- total_tiles_created + length(tiles_list)
+}
+
+# ============================================================================
+# VERIFICATION
+# ============================================================================
+
+cat("\n[7] Verifying output...\n")
+
+# Count tiles per date folder
+date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
+date_folders <- sort(date_folders[date_folders != "."])
+
+total_tile_files <- 0
+for (date_folder in date_folders) {
+ tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder),
+ pattern = "\\.tif$")
+ tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)]
+ total_tile_files <- total_tile_files + length(tiles_in_folder)
+ cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "")
+}
+
+# ============================================================================
+# SUMMARY
+# ============================================================================
+
+cat("SUMMARY\n")
+
+cat("\nMaster Grid:\n")
+cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, "=", N_TILES, " tiles\n", sep = "")
+cat(" - File: master_grid_5x5.geojson\n")
+
+cat("\nTIFF Splitting:\n")
+cat(" - TIFFs processed: ", length(tiff_files), "\n", sep = "")
+cat(" - Total tile files created: ", total_tile_files, "\n", sep = "")
+cat(" - Expected total: ", length(tiff_files) * N_TILES, "\n", sep = "")
+
+cat("\nDirectory Structure:\n")
+cat(" daily_tiles/\n")
+cat(" ├── master_grid_5x5.geojson\n")
+for (date_folder in date_folders) {
+ cat(" ├── ", date_folder, "/\n", sep = "")
+ cat(" │ ├── ", date_folder, "_01.tif\n", sep = "")
+ cat(" │ ├── ", date_folder, "_02.tif\n", sep = "")
+ cat(" │ └── ...\n")
+}
+
+cat("\nKey Properties:\n")
+cat(" - All tiles align perfectly across dates\n")
+cat(" - Tile 01 covers same area in all dates\n")
+cat(" - Date folders provide organizational hierarchy\n")
+cat(" - Can do time-series analysis per tile\n")
+
+cat("✓ Script complete!\n")
diff --git a/r_app/02_ci_extraction.R b/r_app/02_ci_extraction.R
index d5dd572..b430c02 100644
--- a/r_app/02_ci_extraction.R
+++ b/r_app/02_ci_extraction.R
@@ -9,13 +9,17 @@
# - offset: Number of days to look back from end_date
# - project_dir: Project directory name (e.g., "angata", "aura", "chemba")
# - data_source: Data source directory - "merged_tif_8b" (default) or "merged_tif" (4-band) or "merged_final_tif"
+# If tiles exist (daily_tiles_split/), they are used automatically
#
# Examples:
# # Angata 8-band data (with UDM cloud masking)
-# Rscript 02_ci_extraction.R 2025-12-02 7 angata merged_tif_8b
+# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/02_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
#
# # Aura 4-band data
# Rscript 02_ci_extraction.R 2025-11-26 7 aura merged_tif
+#
+# # Auto-detects and uses tiles if available:
+# Rscript 02_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists)
# 1. Load required packages
# -----------------------
@@ -24,9 +28,9 @@ suppressPackageStartupMessages({
library(terra)
library(tidyverse)
library(lubridate)
- library(exactextractr)
library(readxl)
library(here)
+ library(furrr)
})
# 2. Process command line arguments
@@ -65,7 +69,7 @@ main <- function() {
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
- project_dir <- "aura" # Changed default from "aura" to "esa"
+ project_dir <- "angata" # Changed default from "aura" to "esa"
}
# Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif)
@@ -116,28 +120,49 @@ main <- function() {
# 4. Generate date list for processing
# ---------------------------------
- dates <- date_list(end_date, offset)
+ dates <- date_list(end_date, 7)
log_message(paste("Processing data for week", dates$week, "of", dates$year))
# 5. Find and filter raster files by date
# -----------------------------------
log_message("Searching for raster files")
+ # Check if tiles exist (Script 01 output)
+ tile_folder <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split")
+ use_tiles <- dir.exists(tile_folder)
+
tryCatch({
- # Use the new utility function to find satellite images
- existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter)
- log_message(paste("Found", length(existing_files), "raster files for processing"))
-
- # 6. Process raster files and create VRT
- # -----------------------------------
- # Use the new utility function for batch processing
- vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt)
-
- # 7. Process and combine CI values
- # ------------------------------
- # Call the process_ci_values function from utils with all required parameters
- process_ci_values(dates, field_boundaries, merged_final,
- field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir)
+ if (use_tiles) {
+ # Use tile-based processing
+ log_message(paste("Tile folder detected at", tile_folder))
+ log_message("Using tile-based CI extraction")
+
+ # Call the tile-based extraction function
+ process_ci_values_from_tiles(
+ dates = dates,
+ tile_folder = tile_folder,
+ field_boundaries = field_boundaries,
+ field_boundaries_sf = field_boundaries_sf,
+ daily_CI_vals_dir = daily_CI_vals_dir,
+ cumulative_CI_vals_dir = cumulative_CI_vals_dir,
+ merged_final_dir = merged_final
+ )
+
+ } else {
+ # Use legacy full-extent processing
+ log_message("No tiles found. Using legacy full-extent approach")
+
+ # Use the existing utility function to find satellite images
+ existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter)
+ log_message(paste("Found", length(existing_files), "raster files for processing"))
+
+ # Process raster files and create VRT
+ vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt)
+
+ # Process and combine CI values
+ process_ci_values(dates, field_boundaries, merged_final,
+ field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir)
+ }
}, error = function(e) {
log_message(paste("Error in main processing:", e$message), level = "ERROR")
diff --git a/r_app/04_mosaic_creation.R b/r_app/04_mosaic_creation.R
index 04c20b9..6a591cc 100644
--- a/r_app/04_mosaic_creation.R
+++ b/r_app/04_mosaic_creation.R
@@ -5,11 +5,17 @@
# This script creates weekly mosaics from daily satellite imagery.
# It handles command-line arguments and initiates the mosaic creation process.
#
-# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name]
+# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [use_tiles] [tile_size]
# - end_date: End date for processing (YYYY-MM-DD format)
# - offset: Number of days to look back from end_date
# - project_dir: Project directory name (e.g., "chemba")
# - file_name: Optional custom output file name
+# - use_tiles: Use tile-based processing for memory efficiency (TRUE/FALSE, default: FALSE)
+# - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE)
+#
+# Examples:
+# Rscript 04_mosaic_creation.R 2025-12-21 7 angata
+# Rscript 04_mosaic_creation.R 2025-12-21 7 angata week_51.tif TRUE 5 [tile-based]
#
# 1. Load required packages
@@ -75,16 +81,16 @@ main <- function() {
# 3. Initialize project configuration
# --------------------------------
- # Detect which data source directory exists (merged_virtual_8b or merged_tif_8b)
+ # Detect which data source directory exists (merged_tif or merged_tif_8b)
laravel_storage <- here::here("laravel_app/storage/app", project_dir)
- data_source <- if (dir.exists(file.path(laravel_storage, "merged_virtual_8b"))) {
- message("Detected data source: merged_virtual_8b")
- "merged_virtual_8b"
- } else if (dir.exists(file.path(laravel_storage, "merged_tif_8b"))) {
- message("Detected data source: merged_tif_8b")
+ data_source <- if (dir.exists(file.path(laravel_storage, "merged_tif_8b"))) {
+ message("Detected data source: merged_tif_8b (8-band optimized)")
"merged_tif_8b"
+ } else if (dir.exists(file.path(laravel_storage, "merged_tif"))) {
+ message("Detected data source: merged_tif (legacy 4-band)")
+ "merged_tif"
} else {
- message("Using default data source: merged_tif_8b")
+ message("Warning: No data source found. Using default: merged_tif_8b")
"merged_tif_8b"
}
@@ -121,22 +127,26 @@ main <- function() {
safe_log(paste("Output will be saved as:", file_name_tif))
- # 5. Create weekly mosaic using the function from utils
- # -------------------------------------------------
+ # 5. Create weekly per-tile MAX mosaics
+ # ----------------------------------
+
tryCatch({
- safe_log("Starting mosaic creation...")
- result <- create_weekly_mosaic(
+ safe_log("Starting per-tile mosaic creation...")
+
+ # Set output directory for per-tile mosaics
+ tile_output_base <- file.path(laravel_storage, "weekly_tile_max")
+
+ created_tile_files <- create_weekly_mosaic_from_tiles(
dates = dates,
- field_boundaries = field_boundaries,
- daily_vrt_dir = daily_vrt,
merged_final_dir = merged_final,
- output_dir = weekly_CI_mosaic,
- file_name_tif = file_name_tif,
- create_plots = TRUE
+ tile_output_dir = tile_output_base,
+ field_boundaries = field_boundaries
)
- safe_log(paste("Mosaic creation completed successfully:", result))
+
+ safe_log(paste("✓ Per-tile mosaic creation completed - created",
+ length(created_tile_files), "tile files"))
}, error = function(e) {
- safe_log(paste("ERROR in create_weekly_mosaic:", e$message), "WARNING")
+ safe_log(paste("ERROR in mosaic creation:", e$message), "WARNING")
traceback()
stop("Mosaic creation failed")
})
diff --git a/r_app/09_field_analysis_weekly.R b/r_app/09_field_analysis_weekly.R
index 5585ac8..49a9fd0 100644
--- a/r_app/09_field_analysis_weekly.R
+++ b/r_app/09_field_analysis_weekly.R
@@ -377,11 +377,15 @@ calculate_cloud_coverage_from_mosaic <- function(mosaic_raster, field_boundaries
field_id <- field_boundaries_sf$field[i]
sub_field <- field_boundaries_sf$sub_field[i]
- # Extract pixel values from field using exactextractr (avoids crop/mask boundary artifacts)
+ # Extract pixel values from field using terra::extract (memory-efficient, native terra)
extracted_vals <- tryCatch({
- result <- exactextractr::exact_extract(ci_band, field_boundaries_sf[i, ], progress = FALSE)
- # exact_extract returns a list; get the first (only) element which is the data.frame
- if (is.list(result)) result[[1]] else result
+ result <- terra::extract(ci_band, field_boundaries_sf[i, ], cells = TRUE, na.rm = FALSE)
+ # terra::extract returns a data.frame with ID and value columns; extract values only
+ if (is.data.frame(result) && nrow(result) > 0) {
+ data.frame(value = result$value, cells = result$cell)
+ } else {
+ NULL
+ }
}, error = function(e) {
NULL
})
diff --git a/r_app/09b_field_analysis_weekly.R b/r_app/09b_field_analysis_weekly.R
new file mode 100644
index 0000000..6bbb7c3
--- /dev/null
+++ b/r_app/09b_field_analysis_weekly.R
@@ -0,0 +1,942 @@
+# 09b_FIELD_ANALYSIS_WEEKLY.R (NEW - TILE-AWARE WITH PARALLEL PROCESSING)
+# ============================================================================
+# Per-field weekly analysis with tile-based mosaic extraction and parallel processing
+#
+# MAJOR IMPROVEMENTS OVER 09_field_analysis_weekly.R:
+# - Tile-aware: Loads only relevant tiles for each field (memory efficient)
+# - Parallel processing: Uses furrr for parallel field extraction (1300+ fields supported)
+# - Field-based cloud analysis: Cloud coverage calculated per-field from tiles
+# - Scalable: Architecture ready for 13,000+ fields
+#
+# Generates detailed field-level CSV export with:
+# - Field identifiers and areas
+# - Weekly CI change (mean ± std) from tile-based extraction
+# - Age-based phase assignment (Germination, Tillering, Grand Growth, Maturation)
+# - Harvest imminence detection (Phase 1 from LSTM model) - optional
+# - Status triggers (non-exclusive, can coexist with harvest imminent phase)
+# - Phase transition tracking (weeks in current phase)
+# - Cloud coverage analysis from tiles (per-field, not mosaic-wide)
+#
+# Cloud Coverage Per-Field:
+# - Extracts from relevant tiles that field intersects
+# - Categories: Clear view (>=99.5%), Partial coverage (0-99.5%), No image available (0%)
+# - Reports % of fields by cloud category
+#
+# Parallel Processing:
+# - Uses furrr::future_map_df() for CPU-parallel field extraction
+# - Configure workers before running: future::plan(future::multisession, workers = N)
+# - Each worker: loads tile(s), extracts CI, calculates stats
+# - Significant speedup for 1000+ fields
+#
+# Output:
+# - Excel (.xlsx) with Field Data sheet and Summary sheet
+# - Excel (.xlsx) weekly harvest predictions for tracking
+# - RDS file with field_analysis and field_analysis_summary for Rmd reports
+# - Summary includes: Monitored area, Cloud coverage, Phase distribution, Status triggers
+#
+# Usage: Rscript 09b_field_analysis_weekly.R [end_date] [project_dir]
+# - end_date: End date for analysis (YYYY-MM-DD format), default: today
+# - project_dir: Project directory name (e.g., "aura", "esa", "angata")
+#
+# Example:
+# Rscript 09b_field_analysis_weekly.R 2026-01-08 angata
+
+# 1. Load required libraries
+suppressPackageStartupMessages({
+ library(here)
+ library(sf)
+ library(terra)
+ library(dplyr)
+ library(tidyr)
+ library(lubridate)
+ library(readr)
+ library(readxl)
+ library(writexl)
+ library(purrr)
+ library(furrr)
+ library(future)
+ # Optional: torch for harvest model inference (will skip if not available)
+ tryCatch({
+ library(torch)
+ }, error = function(e) {
+ message("Note: torch package not available - harvest model inference will be skipped")
+ })
+})
+
+# ============================================================================
+# PHASE AND STATUS TRIGGER DEFINITIONS
+# ============================================================================
+
+PHASE_DEFINITIONS <- data.frame(
+ phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
+ age_start = c(0, 4, 17, 39),
+ age_end = c(6, 16, 39, 200),
+ stringsAsFactors = FALSE
+)
+
+STATUS_TRIGGERS <- data.frame(
+ trigger = c(
+ "germination_started",
+ "germination_complete",
+ "stress_detected_whole_field",
+ "strong_recovery",
+ "growth_on_track",
+ "maturation_progressing",
+ "harvest_ready"
+ ),
+ age_min = c(0, 0, NA, NA, 4, 39, 45),
+ age_max = c(6, 6, NA, NA, 39, 200, 200),
+ description = c(
+ "10% of field CI > 2",
+ "70% of field CI >= 2",
+ "CI decline > -1.5 + low CV",
+ "CI increase > +1.5",
+ "CI increasing consistently",
+ "High CI, stable/declining",
+ "Age 45+ weeks (ready to harvest)"
+ ),
+ stringsAsFactors = FALSE
+)
+
+# ============================================================================
+# TILE-AWARE HELPER FUNCTIONS
+# ============================================================================
+
+#' Get tile IDs that a field geometry intersects
+#'
+#' @param field_geom Single field geometry (sf or terra::vect)
+#' @param tile_grid Data frame with tile extents (id, xmin, xmax, ymin, ymax)
+#' @return Numeric vector of tile IDs that field intersects
+#'
+get_tile_ids_for_field <- function(field_geom, tile_grid) {
+ # Convert field to bounding box
+ if (inherits(field_geom, "sf")) {
+ field_bbox <- sf::st_bbox(field_geom)
+ field_xmin <- field_bbox["xmin"]
+ field_xmax <- field_bbox["xmax"]
+ field_ymin <- field_bbox["ymin"]
+ field_ymax <- field_bbox["ymax"]
+ } else if (inherits(field_geom, "SpatVector")) {
+ field_bbox <- terra::ext(field_geom)
+ field_xmin <- field_bbox$xmin
+ field_xmax <- field_bbox$xmax
+ field_ymin <- field_bbox$ymin
+ field_ymax <- field_bbox$ymax
+ } else {
+ stop("field_geom must be sf or terra::vect object")
+ }
+
+ # Check intersection with each tile extent
+ intersecting_tiles <- tile_grid$id[
+ !(tile_grid$xmax < field_xmin |
+ tile_grid$xmin > field_xmax |
+ tile_grid$ymax < field_ymin |
+ tile_grid$ymin > field_ymax)
+ ]
+
+ return(as.numeric(intersecting_tiles))
+}
+
+#' Load CI tiles that a field intersects
+#'
+#' @param field_geom Single field geometry
+#' @param tile_ids Numeric vector of tile IDs to load
+#' @param week_num Week number
+#' @param year Year
+#' @param mosaic_dir Directory with weekly tiles
+#' @return Single CI raster (merged if multiple tiles, or single tile)
+#'
+load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) {
+ if (length(tile_ids) == 0) {
+ return(NULL)
+ }
+
+ # Load relevant tiles
+ tiles_list <- list()
+ for (tile_id in sort(tile_ids)) {
+ tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id)
+ tile_path <- file.path(mosaic_dir, tile_filename)
+
+ if (file.exists(tile_path)) {
+ tryCatch({
+ tile_rast <- terra::rast(tile_path)
+ # Extract CI band (band 5 or named "CI")
+ if ("CI" %in% names(tile_rast)) {
+ ci_band <- tile_rast[["CI"]]
+ } else if (terra::nlyr(tile_rast) >= 5) {
+ ci_band <- tile_rast[[5]]
+ } else {
+ ci_band <- tile_rast[[1]]
+ }
+ names(ci_band) <- "CI"
+ tiles_list[[length(tiles_list) + 1]] <- ci_band
+ }, error = function(e) {
+ message(paste(" Warning: Could not load tile", tile_id, ":", e$message))
+ })
+ }
+ }
+
+ if (length(tiles_list) == 0) {
+ return(NULL)
+ }
+
+ # If multiple tiles, merge them; otherwise return single tile
+ if (length(tiles_list) == 1) {
+ return(tiles_list[[1]])
+ } else {
+ tryCatch({
+ rsrc <- terra::sprc(tiles_list)
+ merged <- terra::mosaic(rsrc, fun = "max")
+ return(merged)
+ }, error = function(e) {
+ message(paste(" Warning: Could not merge tiles:", e$message))
+ return(tiles_list[[1]]) # Fallback to first tile
+ })
+ }
+}
+
+#' Build tile grid from available weekly tile files
+#'
+#' @param mosaic_dir Directory with weekly tiles
+#' @param week_num Week number to discover tiles
+#' @param year Year to discover tiles
+#' @return Data frame with columns: id, xmin, xmax, ymin, ymax
+#'
+build_tile_grid <- function(mosaic_dir, week_num, year) {
+ # Find all tiles for this week/year
+ tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
+ tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
+
+ if (length(tile_files) == 0) {
+ stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
+ }
+
+ # Extract extents from each tile
+ tile_grid <- data.frame(
+ id = integer(),
+ xmin = numeric(),
+ xmax = numeric(),
+ ymin = numeric(),
+ ymax = numeric(),
+ stringsAsFactors = FALSE
+ )
+
+ for (tile_file in tile_files) {
+ tryCatch({
+ # Extract tile ID from filename
+ matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file)))
+ if (length(matches) > 0) {
+ tile_id <- as.integer(gsub("[^0-9]", "", matches))
+
+ # Load raster and get extent
+ tile_rast <- terra::rast(tile_file)
+ ext <- terra::ext(tile_rast)
+
+ tile_grid <- rbind(tile_grid, data.frame(
+ id = tile_id,
+ xmin = ext$xmin,
+ xmax = ext$xmax,
+ ymin = ext$ymin,
+ ymax = ext$ymax,
+ stringsAsFactors = FALSE
+ ))
+ }
+ }, error = function(e) {
+ message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message))
+ })
+ }
+
+ if (nrow(tile_grid) == 0) {
+ stop("Could not extract extents from any tile files")
+ }
+
+ return(tile_grid)
+}
+
+# ============================================================================
+# HELPER FUNCTIONS (FROM ORIGINAL 09)
+# ============================================================================
+
+get_phase_by_age <- function(age_weeks) {
+ if (is.na(age_weeks)) return(NA_character_)
+ for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
+ if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
+ age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
+ return(PHASE_DEFINITIONS$phase[i])
+ }
+ }
+ return("Unknown")
+}
+
+get_status_trigger <- function(ci_values, ci_change, age_weeks) {
+ if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)
+
+ ci_values <- ci_values[!is.na(ci_values)]
+ if (length(ci_values) == 0) return(NA_character_)
+
+ pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
+ pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
+ ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
+ mean_ci <- mean(ci_values, na.rm = TRUE)
+
+ # Germination phase triggers (age 0-6)
+ if (age_weeks >= 0 && age_weeks <= 6) {
+ if (pct_at_or_above_2 >= 70) {
+ return("germination_complete")
+ } else if (pct_above_2 > 10) {
+ return("germination_started")
+ }
+ }
+
+ # Harvest ready (45+ weeks) - check first to prioritize
+ if (age_weeks >= 45) {
+ return("harvest_ready")
+ }
+
+ # Stress detection (any phase except Germination)
+ if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
+ return("stress_detected_whole_field")
+ }
+
+ # Strong recovery (any phase except Germination)
+ if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
+ return("strong_recovery")
+ }
+
+ # Growth on track (Tillering/Grand Growth, 4-39 weeks)
+ if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
+ return("growth_on_track")
+ }
+
+ # Maturation progressing (39-45 weeks, high CI stable/declining)
+ if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
+ return("maturation_progressing")
+ }
+
+ return(NA_character_)
+}
+
+load_previous_week_csv <- function(project_dir, current_week, reports_dir) {
+ lookback_weeks <- c(1, 2, 3)
+
+ for (lookback in lookback_weeks) {
+ previous_week <- current_week - lookback
+ if (previous_week < 1) previous_week <- previous_week + 52
+
+ csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", previous_week), ".csv")
+ csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)
+
+ if (file.exists(csv_path)) {
+ tryCatch({
+ data <- read_csv(csv_path, show_col_types = FALSE)
+ return(list(data = data, weeks_lookback = lookback, found = TRUE))
+ }, error = function(e) {
+ message(paste("Warning: Could not load", basename(csv_path), ":", e$message))
+ })
+ }
+ }
+
+ message("No previous field analysis CSV found. Phase tracking will be age-based only.")
+ return(list(data = NULL, weeks_lookback = NA, found = FALSE))
+}
+
+USE_UNIFORM_AGE <- TRUE
+UNIFORM_PLANTING_DATE <- as.Date("2025-01-01")
+
+extract_planting_dates <- function(harvesting_data) {
+ if (USE_UNIFORM_AGE) {
+ message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
+ return(data.frame(
+ field_id = character(),
+ planting_date = as.Date(character()),
+ stringsAsFactors = FALSE
+ ))
+ }
+
+ if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
+ message("Warning: No harvesting data available.")
+ return(NULL)
+ }
+
+ tryCatch({
+ planting_dates <- harvesting_data %>%
+ arrange(field, desc(season_start)) %>%
+ distinct(field, .keep_all = TRUE) %>%
+ select(field, season_start) %>%
+ rename(field_id = field, planting_date = season_start) %>%
+ filter(!is.na(planting_date)) %>%
+ as.data.frame()
+
+ message(paste("Extracted planting dates for", nrow(planting_dates), "fields (most recent season)"))
+ return(planting_dates)
+ }, error = function(e) {
+ message(paste("Error extracting planting dates:", e$message))
+ return(NULL)
+ })
+}
+
+# ============================================================================
+# NOTE: Cloud coverage is now calculated inline in analyze_single_field()
+# ============================================================================
+# Cloud coverage logic (per-field, from same CI extraction):
+# - Extract ALL pixels from field polygon (including NAs from clouds/missing data)
+# - Count: num_data = non-NA pixels, num_total = total pixels in field
+# - Calculate: pct_clear = (num_data / num_total) * 100
+# - Categorize: >=99.5% = "Clear view", >0% = "Partial coverage", 0% = "No image available"
+#
+# This ensures LOGICAL CONSISTENCY:
+# - If CI_mean has value → at least 1 pixel has data → pct_clear > 0 ✓
+# - If pct_clear = 0 → no data → CI_mean = NA ✓
+# - Eliminates double-extraction inefficiency
+
+# ============================================================================
+# PARALLEL FIELD ANALYSIS FUNCTION
+# ============================================================================
+
+#' Analyze single field (for parallel processing)
+#'
+#' This function processes ONE field at a time and is designed to run in parallel
+#' Each call: loads relevant tiles, extracts CI, calculates statistics
+#'
+#' @param field_idx Index in field_boundaries_sf
+#' @param field_boundaries_sf All field boundaries (sf object)
+#' @param current_ci_rasters List of currently loaded CI rasters (by tile_id)
+#' @param previous_ci_rasters List of previously loaded CI rasters (by tile_id)
+#' @param tile_grid Data frame with tile extents
+#' @param week_num Current week number
+#' @param year Current year
+#' @param mosaic_dir Directory with weekly tiles
+#' @param previous_week_csv Previous week's CSV data
+#' @param planting_dates Planting dates lookup
+#' @param report_date Report date
+#' @param harvest_imminence_data Harvest imminence predictions (optional)
+#'
+#' @return Single-row data frame with field analysis
+#'
+analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year,
+ mosaic_dir, previous_week_csv = NULL, planting_dates = NULL,
+ report_date = Sys.Date(), harvest_imminence_data = NULL) {
+
+ tryCatch({
+ # Get field info
+ field_id <- field_boundaries_sf$field[field_idx]
+ farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) {
+ field_boundaries_sf$sub_area[field_idx]
+ } else {
+ NA_character_
+ }
+ field_name <- field_id
+
+ # Get field geometry and validate
+ field_sf <- field_boundaries_sf[field_idx, ]
+ if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) {
+ return(data.frame(
+ Field_id = field_id,
+ Farm_section = farm_section,
+ CI_mean = NA_real_,
+ error = "Empty or invalid geometry"
+ ))
+ }
+
+ # Calculate field area
+ field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
+ field_area_acres <- field_area_ha / 0.404686
+
+ # Determine which tiles this field intersects
+ tile_ids <- get_tile_ids_for_field(field_sf, tile_grid)
+
+ # Load current CI tiles for this field
+ current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir)
+
+ if (is.null(current_ci)) {
+ return(data.frame(
+ Field_id = field_id,
+ Farm_section = farm_section,
+ Hectares = field_area_ha,
+ Acreage = field_area_acres,
+ CI_mean = NA_real_,
+ error = "No tile data available"
+ ))
+ }
+
+ # Extract CI values for current field (keep ALL pixels including NAs for cloud calculation)
+ field_vect <- terra::vect(sf::as_Spatial(field_sf))
+ terra::crs(field_vect) <- terra::crs(current_ci)
+
+ all_extracted <- terra::extract(current_ci, field_vect)[, 2] # ALL pixels (including NAs)
+ current_ci_vals <- all_extracted[!is.na(all_extracted)] # Only non-NA for CI analysis
+
+ # Calculate cloud coverage from SAME extraction (no double-extraction)
+ # Logic: count non-NA pixels vs total pixels in field
+ num_total <- length(all_extracted)
+ num_data <- sum(!is.na(all_extracted))
+ pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 100 # 100 = all cloud/no data
+
+ # Categorize cloud coverage - check for no data first
+ cloud_cat <- if (num_data == 0) "No image available" # No data at all (100% cloud)
+ else if (pct_clear >= 99.5) "Clear view" # 99.5%+ data
+ else "Partial coverage" # Some data but with gaps
+ cloud_pct <- pct_clear
+
+ # If no CI values extracted, return early with cloud info
+ if (length(current_ci_vals) == 0) {
+ return(data.frame(
+ Field_id = field_id,
+ Farm_section = farm_section,
+ Hectares = field_area_ha,
+ Acreage = field_area_acres,
+ CI_mean = NA_real_,
+ Cloud_pct = cloud_pct,
+ Cloud_category = cloud_cat,
+ error = "No CI values extracted"
+ ))
+ }
+
+ # Calculate current CI statistics
+ mean_ci_current <- mean(current_ci_vals, na.rm = TRUE)
+ ci_std <- sd(current_ci_vals, na.rm = TRUE)
+ cv_current <- ci_std / mean_ci_current
+ range_min <- min(current_ci_vals, na.rm = TRUE)
+ range_max <- max(current_ci_vals, na.rm = TRUE)
+ range_str <- sprintf("%.1f-%.1f", range_min, range_max)
+
+ # Calculate weekly CI change (compare with previous week if available)
+ weekly_ci_change <- NA
+ previous_ci_vals <- NULL
+
+ # Try to load previous week tiles for this field
+ tryCatch({
+ previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir)
+ if (!is.null(previous_ci)) {
+ previous_ci_vals <- terra::extract(previous_ci, field_vect)[, 2]
+ previous_ci_vals <- previous_ci_vals[!is.na(previous_ci_vals)]
+ if (length(previous_ci_vals) > 0) {
+ mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE)
+ weekly_ci_change <- mean_ci_current - mean_ci_previous
+ }
+ }
+ }, error = function(e) {
+ # Silent fail - previous week data not available is acceptable
+ })
+
+ # Format CI change
+ if (is.na(weekly_ci_change)) {
+ weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std)
+ } else {
+ weekly_ci_change_str <- sprintf("%.1f ± %.2f (Δ %.2f)", mean_ci_current, ci_std, weekly_ci_change)
+ }
+
+ # Calculate age
+ age_weeks <- NA
+ if (!is.null(planting_dates) && nrow(planting_dates) > 0) {
+ planting_row <- which(planting_dates$field_id == field_id)
+ if (length(planting_row) > 0) {
+ planting_date <- planting_dates$planting_date[planting_row[1]]
+ if (!is.na(planting_date)) {
+ age_weeks <- as.numeric(difftime(report_date, planting_date, units = "weeks"))
+ }
+ }
+ }
+
+ # If using uniform age
+ if (USE_UNIFORM_AGE) {
+ age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
+ }
+
+ # Calculate germination progress
+ pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100
+ pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100
+ germination_progress_str <- NA_character_
+ if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) {
+ germination_progress_str <- sprintf("%.0f%% at CI >= 2", pct_ci_ge_2)
+ }
+
+ # Assign phase and trigger
+ phase <- "Unknown"
+ imminent_prob_val <- NA
+ if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
+ imminent_row <- which(harvest_imminence_data$field_id == field_id)
+ if (length(imminent_row) > 0) {
+ imminent_prob_val <- harvest_imminence_data$imminent_prob[imminent_row[1]]
+ if (!is.na(imminent_prob_val) && imminent_prob_val > 0.5) {
+ phase <- "Harvest Imminent"
+ }
+ }
+ }
+
+ # If not harvest imminent, use age-based phase
+ if (phase == "Unknown") {
+ phase <- get_phase_by_age(age_weeks)
+ }
+
+ status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks)
+
+ # Track phase transitions
+ nmr_weeks_in_phase <- 1
+ if (!is.null(previous_week_csv) && nrow(previous_week_csv) > 0) {
+ prev_row <- which(previous_week_csv$Field_id == field_id)
+ if (length(prev_row) > 0) {
+ prev_phase <- previous_week_csv$`Phase (age based)`[prev_row[1]]
+ if (!is.na(prev_phase) && prev_phase == phase) {
+ prev_weeks <- as.numeric(previous_week_csv$Weeks_in_phase[prev_row[1]])
+ nmr_weeks_in_phase <- if (is.na(prev_weeks)) 1 else prev_weeks + 1
+ }
+ }
+ }
+
+ # Compile result
+ result <- data.frame(
+ Field_id = field_id,
+ Farm_section = farm_section,
+ Hectares = field_area_ha,
+ Acreage = field_area_acres,
+ CI_mean = mean_ci_current,
+ CI_std = ci_std,
+ CI_range = range_str,
+ CI_change_weekly = weekly_ci_change_str,
+ CI_change_value = weekly_ci_change,
+ CV = cv_current,
+ Age_week = age_weeks,
+ `Phase (age based)` = phase,
+ Germination_progress = germination_progress_str,
+ Status_trigger = status_trigger,
+ Weeks_in_phase = nmr_weeks_in_phase,
+ Imminent_prob = imminent_prob_val,
+ Cloud_pct = cloud_pct,
+ Cloud_category = cloud_cat,
+ stringsAsFactors = FALSE
+ )
+
+ return(result)
+
+ }, error = function(e) {
+ message(paste("Error analyzing field", field_idx, ":", e$message))
+ return(data.frame(
+ Field_id = as.character(field_idx),
+ error = e$message
+ ))
+ })
+}
+
+# ============================================================================
+# SUMMARY GENERATION
+# ============================================================================
+
+generate_field_analysis_summary <- function(field_df) {
+ message("Generating summary statistics...")
+
+ # Total acreage (needed for all percentages)
+ total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
+
+ # Phase breakdown
+ germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE)
+ tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE)
+ grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE)
+ maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE)
+ unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE)
+
+ # Status trigger breakdown
+ harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
+ stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
+ recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
+ growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
+ germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
+ germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
+ no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
+
+ # Cloud coverage breakdown - COUNT FIELDS, not acreage for cloud analysis
+ clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
+ partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
+ no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
+ total_fields <- nrow(field_df)
+
+ # Cloud acreage for reporting
+ clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
+ partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
+ no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
+
+ # Create summary table
+ summary_df <- data.frame(
+ Category = c(
+ "=== PHASE DISTRIBUTION ===",
+ "Germination",
+ "Tillering",
+ "Grand Growth",
+ "Maturation",
+ "Unknown Phase",
+ "",
+ "=== STATUS TRIGGERS ===",
+ "Harvest Ready",
+ "Stress Detected",
+ "Strong Recovery",
+ "Growth On Track",
+ "Germination Complete",
+ "Germination Started",
+ "No Trigger",
+ "",
+ "=== CLOUD COVERAGE ===",
+ "Clear View (fields)",
+ "Partial Coverage (fields)",
+ "No Image Available (fields)",
+ "Clear View (acres)",
+ "Partial Coverage (acres)",
+ "No Image Available (acres)",
+ "",
+ "Total Fields",
+ "Total Acreage"
+ ),
+ Acreage = c(
+ NA,
+ round(germination_acreage, 2),
+ round(tillering_acreage, 2),
+ round(grand_growth_acreage, 2),
+ round(maturation_acreage, 2),
+ round(unknown_phase_acreage, 2),
+ NA,
+ NA,
+ round(harvest_ready_acreage, 2),
+ round(stress_acreage, 2),
+ round(recovery_acreage, 2),
+ round(growth_on_track_acreage, 2),
+ round(germination_complete_acreage, 2),
+ round(germination_started_acreage, 2),
+ round(no_trigger_acreage, 2),
+ NA,
+ NA,
+ clear_fields,
+ partial_fields,
+ no_image_fields,
+ round(clear_acreage, 2),
+ round(partial_acreage, 2),
+ round(no_image_acreage, 2),
+ NA,
+ total_fields,
+ round(total_acreage, 2)
+ ),
+ stringsAsFactors = FALSE
+ )
+
+ # Add metadata as attributes
+ attr(summary_df, "cloud_fields_clear") <- clear_fields
+ attr(summary_df, "cloud_fields_partial") <- partial_fields
+ attr(summary_df, "cloud_fields_no_image") <- no_image_fields
+ attr(summary_df, "cloud_fields_total") <- total_fields
+
+ return(summary_df)
+}
+
+# ============================================================================
+# EXPORT FUNCTIONS
+# ============================================================================
+
+export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
+ message("Exporting per-field analysis to Excel and RDS...")
+
+ # Save to kpis/field_analysis subfolder
+ output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
+ if (!dir.exists(output_subdir)) {
+ dir.create(output_subdir, recursive = TRUE)
+ }
+
+ # Create Excel with two sheets
+ excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx")
+ excel_path <- file.path(output_subdir, excel_filename)
+ excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
+
+ sheets <- list(
+ "Field Data" = field_df,
+ "Summary" = summary_df
+ )
+
+ write_xlsx(sheets, excel_path)
+ message(paste("✓ Field analysis Excel exported to:", excel_path))
+
+ # Also save as RDS
+ kpi_data <- list(
+ field_analysis = field_df,
+ field_analysis_summary = summary_df,
+ metadata = list(
+ week = current_week,
+ export_date = Sys.Date()
+ )
+ )
+
+ rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds")
+ rds_path <- file.path(reports_dir, "kpis", rds_filename)
+
+ saveRDS(kpi_data, rds_path)
+ message(paste("✓ Field analysis RDS exported to:", rds_path))
+
+ # Also export as CSV for field history tracking
+ csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
+ csv_path <- file.path(output_subdir, csv_filename)
+ write_csv(field_df, csv_path)
+ message(paste("✓ Field analysis CSV exported to:", csv_path))
+
+ return(list(excel = excel_path, rds = rds_path, csv = csv_path))
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+main <- function() {
+ args <- commandArgs(trailingOnly = TRUE)
+
+ end_date <- if (length(args) >= 1 && !is.na(args[1])) {
+ as.Date(args[1])
+ } else if (exists("end_date_str", envir = .GlobalEnv)) {
+ as.Date(get("end_date_str", envir = .GlobalEnv))
+ } else {
+ Sys.Date()
+ }
+
+ project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
+ as.character(args[2])
+ } else if (exists("project_dir", envir = .GlobalEnv)) {
+ get("project_dir", envir = .GlobalEnv)
+ } else {
+ "angata"
+ }
+
+ # IMPORTANT: Assign project_dir BEFORE sourcing parameters_project.R
+ # so that initialize_project() can access it via exists("project_dir")
+ assign("project_dir", project_dir, envir = .GlobalEnv)
+
+ # Load utilities and configuration (in this order - crop_messaging_utils before parameters)
+ source(here("r_app", "crop_messaging_utils.R"))
+ source(here("r_app", "parameters_project.R"))
+
+ message("=== FIELD ANALYSIS WEEKLY (TILE-AWARE, PARALLEL) ===")
+ message(paste("Date:", end_date))
+ message(paste("Project:", project_dir))
+
+ # Calculate weeks
+ current_week <- as.numeric(format(end_date, "%V"))
+ year <- as.numeric(format(end_date, "%Y"))
+ previous_week <- current_week - 1
+ if (previous_week < 1) previous_week <- 52
+
+ message(paste("Week:", current_week, "/ Year:", year))
+
+ # Build tile grid from available tiles
+ message("Building tile grid from available weekly tiles...")
+ tile_grid <- build_tile_grid(weekly_tile_max, current_week, year)
+ message(paste(" Found", nrow(tile_grid), "tiles for analysis"))
+
+ # Load field boundaries
+ tryCatch({
+ boundaries_result <- load_field_boundaries(data_dir)
+
+ # load_field_boundaries returns a list with field_boundaries_sf and field_boundaries
+ if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
+ field_boundaries_sf <- boundaries_result$field_boundaries_sf
+ } else {
+ field_boundaries_sf <- boundaries_result
+ }
+
+ # Check if field_boundaries_sf is valid
+ if (!is.data.frame(field_boundaries_sf) && !inherits(field_boundaries_sf, "sf")) {
+ stop("Field boundaries is not a valid sf object or data frame")
+ }
+
+ if (nrow(field_boundaries_sf) == 0) {
+ stop("Field boundaries loaded but contains 0 rows")
+ }
+
+ message(paste(" Loaded", nrow(field_boundaries_sf), "fields from boundaries"))
+ }, error = function(e) {
+ stop("ERROR loading field boundaries: ", e$message,
+ "\nCheck that pivot.geojson exists in ", data_dir)
+ })
+
+ # Load previous week data for phase tracking
+ previous_week_result <- load_previous_week_csv(project_dir, current_week, reports_dir)
+ previous_week_csv <- if (previous_week_result$found) previous_week_result$data else NULL
+
+ # Load planting dates
+ planting_dates <- extract_planting_dates(harvesting_data)
+
+ # === PARALLEL PROCESSING SETUP ===
+ message("Setting up parallel processing...")
+
+ # Check if future is already planned
+ current_plan <- class(future::plan())[1]
+ if (current_plan == "sequential") {
+ # Default to multisession with auto-detected workers
+ num_workers <- parallel::detectCores() - 1
+ message(paste(" Using", num_workers, "workers for parallel processing"))
+ future::plan(future::multisession, workers = num_workers)
+ } else {
+ message(paste(" Using existing plan:", current_plan))
+ }
+
+ # === PARALLEL FIELD ANALYSIS ===
+ message("Analyzing fields in parallel...")
+
+ # Map over all fields using furrr (parallel version of map)
+ field_analysis_list <- furrr::future_map(
+ seq_len(nrow(field_boundaries_sf)),
+ ~ analyze_single_field(
+ field_idx = .,
+ field_boundaries_sf = field_boundaries_sf,
+ tile_grid = tile_grid,
+ week_num = current_week,
+ year = year,
+ mosaic_dir = weekly_tile_max,
+ previous_week_csv = previous_week_csv,
+ planting_dates = planting_dates,
+ report_date = end_date,
+ harvest_imminence_data = NULL # Optional: add if available
+ ),
+ .progress = TRUE,
+ .options = furrr::furrr_options(seed = TRUE)
+ )
+
+ # Bind list of data frames into single data frame
+ field_analysis_df <- dplyr::bind_rows(field_analysis_list)
+
+ if (nrow(field_analysis_df) == 0) {
+ stop("No fields analyzed successfully!")
+ }
+
+ message(paste("✓ Analyzed", nrow(field_analysis_df), "fields"))
+
+ # Generate summary
+ summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)
+
+ # Export results
+ export_paths <- export_field_analysis_excel(
+ field_analysis_df,
+ summary_statistics_df,
+ project_dir,
+ current_week,
+ reports_dir
+ )
+
+ # Print summary
+ cat("\n=== FIELD ANALYSIS SUMMARY ===\n")
+ cat("Fields analyzed:", nrow(field_analysis_df), "\n")
+ cat("Excel export:", export_paths$excel, "\n")
+ cat("RDS export:", export_paths$rds, "\n")
+ cat("CSV export:", export_paths$csv, "\n\n")
+
+ cat("--- Per-field results (first 10) ---\n")
+ # Select only columns that exist to avoid print errors
+ available_cols <- c("Field_id", "Acreage", "Age_week", "CI_mean", "Status_trigger", "Cloud_category")
+ available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
+ if (length(available_cols) > 0) {
+ print(head(field_analysis_df[, available_cols], 10))
+ } else {
+ print(head(field_analysis_df, 10))
+ }
+
+ cat("\n--- Summary statistics ---\n")
+ print(summary_statistics_df)
+
+ message("\n✓ Field analysis complete!")
+}
+
+if (sys.nframe() == 0) {
+ main()
+}
diff --git a/r_app/10_CI_report_with_kpis_simple.Rmd b/r_app/10_CI_report_with_kpis_simple.Rmd
index b2e71ca..710d794 100644
--- a/r_app/10_CI_report_with_kpis_simple.Rmd
+++ b/r_app/10_CI_report_with_kpis_simple.Rmd
@@ -41,7 +41,6 @@ suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
- library(exactextractr)
library(tidyverse)
library(tmap)
library(lubridate)
diff --git a/r_app/91_CI_report_with_kpis_Angata.Rmd b/r_app/91_CI_report_with_kpis_Angata.Rmd
index 40f2108..f50bb23 100644
--- a/r_app/91_CI_report_with_kpis_Angata.Rmd
+++ b/r_app/91_CI_report_with_kpis_Angata.Rmd
@@ -41,7 +41,6 @@ suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
- library(exactextractr)
library(tidyverse)
library(tmap)
library(lubridate)
diff --git a/r_app/ci_extraction_utils.R b/r_app/ci_extraction_utils.R
index 4ca7cc0..99a1fee 100644
--- a/r_app/ci_extraction_utils.R
+++ b/r_app/ci_extraction_utils.R
@@ -2,6 +2,10 @@
# =====================
# Utility functions for the SmartCane CI (Chlorophill Index) extraction workflow.
# These functions support date handling, raster processing, and data extraction.
+# Includes parallel tile processing using furrr for memory efficiency.
+#
+# Parallel Processing: Tile-based extraction uses furrr::future_map to process
+# multiple tiles simultaneously (typically 2-4 tiles in parallel depending on CPU cores)
#' Safe logging function that works whether log_message exists or not
#'
@@ -75,12 +79,22 @@ detect_raster_structure <- function(loaded_raster) {
# Determine raster type and structure
if (n_bands == 4) {
+ # 4-band optimized: RGB + NIR (cloud-masked server-side by Planet)
return(list(
type = "4b",
has_udm = FALSE,
band_names = c("Red", "Green", "Blue", "NIR"),
red_idx = 1, green_idx = 2, blue_idx = 3, nir_idx = 4, udm_idx = NA
))
+ } else if (n_bands == 5) {
+ # 4-band + alpha channel (may be added by GDAL merge process)
+ # Alpha channel is ignored, we use first 4 bands
+ return(list(
+ type = "4b",
+ has_udm = FALSE,
+ band_names = c("Red", "Green", "Blue", "NIR", "Alpha"),
+ red_idx = 1, green_idx = 2, blue_idx = 3, nir_idx = 4, udm_idx = NA
+ ))
} else if (n_bands %in% c(8, 9)) {
# PlanetScope 8-band structure:
# 1=Coastal Blue, 2=Blue, 3=Green I, 4=Green, 5=Yellow, 6=Red, 7=Red Edge, 8=NIR
@@ -98,7 +112,7 @@ detect_raster_structure <- function(loaded_raster) {
))
} else {
stop(paste("Unexpected number of bands:", n_bands,
- "Expected 4-band, 8-band, or 9-band (8-band + UDM) data"))
+ "Expected 4-band, 4-band+alpha, 8-band, or 9-band data"))
}
}
@@ -149,17 +163,8 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
stop("Field boundaries are required but were not provided")
}
- # CRITICAL: Convert field_boundaries to terra if it's an sf object
- # This ensures all subsequent terra operations work correctly
- # But if it's already a terra object or conversion fails, use as-is
- if (inherits(field_boundaries, "sf")) {
- field_boundaries <- tryCatch({
- terra::vect(field_boundaries)
- }, error = function(e) {
- warning(paste("Could not convert sf to terra:", e$message, "- using sf object directly"))
- field_boundaries # Return original sf object
- })
- }
+ # Note: No conversion needed - sf::st_bbox() works with both sf and terra objects
+ # field_boundaries is used only for spatial reference (bounding box)
# Establish file names for output
basename_no_ext <- tools::file_path_sans_ext(basename(file))
@@ -440,12 +445,30 @@ extract_rasters_daily <- function(file, field_geojson, quadrants = TRUE, save_di
stop("CI band not found in raster")
}
- # Extract statistics
- pivot_stats <- cbind(
- field_geojson,
- mean_CI = round(exactextractr::exact_extract(x$CI, field_geojson, fun = "mean"), 2)
- ) %>%
- sf::st_drop_geometry() %>%
+ # Get raster info for logging (dimensions and memory scale)
+ raster_cells <- terra::ncell(x$CI)
+ raster_size_mb <- (raster_cells * 8) / (1024 * 1024) # approximate MB for double precision
+ safe_log(paste(" Raster size:", format(raster_cells, big.mark=","), "cells (~",
+ round(raster_size_mb, 1), "MB)"))
+
+ # Crop raster to field boundaries extent BEFORE extraction
+ # This reduces memory usage by working with a smaller spatial subset
+ field_bbox <- sf::st_bbox(field_geojson)
+ x_cropped <- terra::crop(x, terra::ext(field_bbox), snap = "out")
+
+ cropped_cells <- terra::ncell(x_cropped$CI)
+ cropped_mb <- (cropped_cells * 8) / (1024 * 1024)
+ safe_log(paste(" After crop:", format(cropped_cells, big.mark=","),
+ "cells (~", round(cropped_mb, 1), "MB)"))
+
+ # Extract statistics using terra::extract (memory-efficient, works with sf directly)
+ # terra::extract returns a data.frame with ID (row numbers) and extracted values
+ extracted_vals <- terra::extract(x_cropped$CI, field_geojson, fun = "mean", na.rm = TRUE)
+
+ # Build result matching expected format (field, sub_field, date columns)
+ pivot_stats <- field_geojson %>%
+ sf::st_drop_geometry() %>%
+ mutate(mean_CI = round(extracted_vals[, 2], 2)) %>%
dplyr::rename("{date}" := mean_CI)
# Determine save path
@@ -507,11 +530,20 @@ combine_ci_values <- function(daily_CI_vals_dir, output_file = NULL) {
#'
update_ci_data <- function(new_data, existing_data_file) {
if (!file.exists(existing_data_file)) {
- warning(paste("Existing data file not found:", existing_data_file))
+ # File doesn't exist - create it with new data
+ safe_log(paste("Creating new CI data file:", existing_data_file))
+
+ # Ensure directory exists
+ dir.create(dirname(existing_data_file), recursive = TRUE, showWarnings = FALSE)
+
+ # Save new data
+ saveRDS(new_data, existing_data_file)
+ safe_log(paste("New CI data file created:", existing_data_file))
+
return(new_data)
}
- # Load existing data
+ # File exists - load existing data and combine
existing_data <- readRDS(existing_data_file)
# Combine data, handling duplicates by keeping the newer values
@@ -614,7 +646,328 @@ process_ci_values <- function(dates, field_boundaries, merged_final_dir,
dplyr::group_by(sub_field)
# Update the combined data file
- update_ci_data(new_pivot_stats, combined_ci_path)
+ update_ci_data(new_data = new_pivot_stats, existing_data_file =combined_ci_path)
safe_log("CI values from latest images added to combined_CI_data.rds")
}
}
+#' Process CI values from pre-split tiles (Script 01 output)
+#'
+#' This function processes CI values from tiles instead of full-extent rasters.
+#' Tiles are created by Script 01 and stored in daily_tiles_split/[DATE]/ folders.
+#' For each field, it aggregates CI statistics from all tiles that intersect that field.
+#'
+#' @param dates List of dates from date_list()
+#' @param tile_folder Path to the tile folder (daily_tiles_split)
+#' @param field_boundaries Field boundaries as vector object
+#' @param field_boundaries_sf Field boundaries as SF object
+#' @param daily_CI_vals_dir Directory to save daily CI values
+#' @param cumulative_CI_vals_dir Directory to save cumulative CI values
+#' @param merged_final_dir Directory to save processed tiles with CI band
+#' @return NULL (used for side effects)
+#'
+process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries,
+ field_boundaries_sf, daily_CI_vals_dir,
+ cumulative_CI_vals_dir, merged_final_dir) {
+
+ # Define path for combined CI data
+ combined_ci_path <- here::here(cumulative_CI_vals_dir, "combined_CI_data.rds")
+
+ # Discover all dates with tiles
+ tile_dates <- list.dirs(tile_folder, full.names = FALSE, recursive = FALSE)
+ tile_dates <- tile_dates[tile_dates != "master_grid_5x5.geojson"] # Remove non-date entries
+ tile_dates <- sort(tile_dates)
+
+ # Filter to dates in current processing range
+ dates_to_process <- tile_dates[tile_dates %in% dates$days_filter]
+
+ if (length(dates_to_process) == 0) {
+ safe_log("No tile dates found in processing date range", "WARNING")
+ return(invisible(NULL))
+ }
+
+ safe_log(paste("Found", length(dates_to_process), "date(s) with tiles"))
+
+ # Check if the combined CI data file exists
+ if (!file.exists(combined_ci_path)) {
+ safe_log("combined_CI_data.rds does not exist. Creating new file with all available tile data.")
+
+ # Process all tile dates
+ all_pivot_stats <- list()
+
+ for (date in tile_dates) {
+ safe_log(paste("Processing tiles for date:", date))
+
+ date_tile_dir <- file.path(tile_folder, date)
+ tile_files <- list.files(date_tile_dir, pattern = "\\.tif$", full.names = TRUE)
+
+ if (length(tile_files) == 0) {
+ safe_log(paste(" No tile files found for", date), "WARNING")
+ next
+ }
+
+ safe_log(paste(" Found", length(tile_files), "tiles for date", date))
+
+ # Process all tiles for this date and aggregate to fields
+ date_stats <- extract_ci_from_tiles(
+ tile_files = tile_files,
+ date = date,
+ field_boundaries_sf = field_boundaries_sf,
+ daily_CI_vals_dir = daily_CI_vals_dir,
+ merged_final_tif_dir = merged_final_dir
+ )
+
+ if (!is.null(date_stats)) {
+ all_pivot_stats[[date]] <- date_stats
+ }
+ }
+
+ # Combine all dates
+ if (length(all_pivot_stats) > 0) {
+ # Use bind_rows() to handle varying column names across dates gracefully
+ combined_stats <- dplyr::bind_rows(all_pivot_stats, .id = NULL)
+ rownames(combined_stats) <- NULL
+
+ # Save combined data
+ saveRDS(combined_stats, combined_ci_path)
+ safe_log("All tile CI values extracted and combined_CI_data.rds created")
+ } else {
+ safe_log("No tile data was processed", "WARNING")
+ }
+
+ } else {
+ # Process only new dates
+ safe_log("combined_CI_data.rds exists, adding new tile data.")
+
+ new_pivot_stats_list <- list()
+
+ for (date in dates_to_process[1:2]) {
+ safe_log(paste("Processing tiles for date:", date))
+
+ date_tile_dir <- file.path(tile_folder, date)
+ tile_files <- list.files(date_tile_dir, pattern = "\\.tif$", full.names = TRUE)
+
+ if (length(tile_files) == 0) {
+ safe_log(paste(" No tile files found for", date), "WARNING")
+ next
+ }
+
+ safe_log(paste(" Found", length(tile_files), "tiles for date", date))
+
+ # Extract CI from tiles for this date
+ date_stats <- extract_ci_from_tiles(
+ tile_files = tile_files,
+ date = date,
+ field_boundaries_sf = field_boundaries_sf,
+ daily_CI_vals_dir = daily_CI_vals_dir,
+ merged_final_tif_dir = merged_final_dir
+ )
+
+ if (!is.null(date_stats)) {
+ new_pivot_stats_list[[date]] <- date_stats
+ }
+ }
+
+ # Combine new data
+ if (length(new_pivot_stats_list) > 0) {
+ # Use bind_rows() to handle varying column names across dates gracefully
+ new_pivot_stats <- dplyr::bind_rows(new_pivot_stats_list, .id = NULL)
+ rownames(new_pivot_stats) <- NULL
+
+ # Update combined file
+ update_ci_data(new_pivot_stats, combined_ci_path)
+ safe_log("Tile CI values from new dates added to combined_CI_data.rds")
+ } else {
+ safe_log("No new tile dates had valid data", "WARNING")
+ }
+ }
+}
+
+#' Process a single tile file, extract CI, save processed tile, and extract statistics
+#'
+#' Helper function for parallel processing of tiles. For each tile:
+#' 1. Loads tile
+#' 2. Creates/extracts CI band
+#' 3. Creates output raster with Red, Green, Blue, NIR, CI bands
+#' 4. Saves to merged_final_tif_dir/[DATE]/ mirroring daily_tiles_split structure
+#' 5. Extracts field-level CI statistics
+#' Returns statistics aggregated to field level.
+#'
+#' @param tile_file Path to a single tile TIF file
+#' @param field_boundaries_sf Field boundaries as SF object
+#' @param date Character string of the date (YYYY-MM-DD format)
+#' @param merged_final_tif_dir Directory to save processed tiles with CI band
+#' @return Data frame with field CI statistics for this tile, or NULL if processing failed
+#'
+process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir) {
+ tryCatch({
+ tile_filename <- basename(tile_file)
+ safe_log(paste(" [TILE] Loading:", tile_filename))
+
+ # Load tile
+ tile_rast <- terra::rast(tile_file)
+
+ # Determine if this is 4-band or 8-band data
+ raster_info <- detect_raster_structure(tile_rast)
+
+ # Extract the bands we need
+ red_band <- tile_rast[[raster_info$red_idx]]
+ green_band <- tile_rast[[raster_info$green_idx]]
+ blue_band <- tile_rast[[raster_info$blue_idx]]
+ nir_band <- tile_rast[[raster_info$nir_idx]]
+
+ # Apply cloud masking if UDM exists
+ if (raster_info$has_udm) {
+ udm_band <- tile_rast[[raster_info$udm_idx]]
+ cloud_mask <- udm_band != 2 # Mask only UDM=2 (clouds)
+
+ red_band[!cloud_mask] <- NA
+ green_band[!cloud_mask] <- NA
+ blue_band[!cloud_mask] <- NA
+ nir_band[!cloud_mask] <- NA
+ }
+
+ # Name the bands
+ names(red_band) <- "Red"
+ names(green_band) <- "Green"
+ names(blue_band) <- "Blue"
+ names(nir_band) <- "NIR"
+
+ # Create CI band
+ if (raster_info$type == "4b") {
+ ci_band <- (nir_band - red_band) / (nir_band + red_band)
+ } else if (raster_info$type == "8b") {
+ red_edge <- tile_rast[[raster_info$red_idx]]
+ ci_band <- (nir_band - red_edge) / (nir_band + red_edge)
+ }
+ names(ci_band) <- "CI"
+
+ # Create output raster with Red, Green, Blue, NIR, CI
+ output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band)
+ names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI")
+
+ # Save processed tile to merged_final_tif_dir/[DATE]/ with same filename
+ date_dir <- file.path(merged_final_tif_dir, date)
+ if (!dir.exists(date_dir)) {
+ dir.create(date_dir, recursive = TRUE, showWarnings = FALSE)
+ }
+
+ # Use same filename as source tile (e.g., 2026-01-02_01.tif)
+ tile_filename <- basename(tile_file)
+ output_file <- file.path(date_dir, tile_filename)
+
+ # Write processed tile
+ terra::writeRaster(output_raster, output_file, overwrite = TRUE)
+ safe_log(paste(" [SAVED TIFF] Output:", file.path(date, basename(output_file)), "(5 bands: Red, Green, Blue, NIR, CI)"))
+
+ # Extract statistics per field from CI band
+ field_bbox <- sf::st_bbox(field_boundaries_sf)
+ ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out")
+ extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE)
+
+ # Build statistics data frame for this tile
+ tile_stats <- field_boundaries_sf %>%
+ sf::st_drop_geometry() %>%
+ mutate(mean_CI = round(extracted_vals[, 2], 2)) %>%
+ mutate(tile_file = basename(tile_file))
+
+ return(tile_stats)
+
+ }, error = function(e) {
+ safe_log(paste(" Error processing tile", basename(tile_file), "-", e$message), "WARNING")
+ return(NULL)
+ })
+}
+
+#' Extract CI values from multiple tiles and aggregate to fields
+#'
+#' Given a set of tile files for a single date, this function:
+#' 1. Loads each tile IN PARALLEL using furrr
+#' 2. Creates/extracts CI band
+#' 3. Saves processed tile (Red, Green, Blue, NIR, CI) to merged_final_tif_dir/[DATE]/
+#' 4. Calculates field statistics from CI band
+#' 5. Aggregates field statistics across tiles
+#' 6. Saves individual date file (matching legacy workflow)
+#'
+#' Parallel processing: Uses future_map to process 2-4 tiles simultaneously depending on available cores.
+#'
+#' @param tile_files Character vector of full paths to tile TIF files
+#' @param date Character string of the date (YYYY-MM-DD format)
+#' @param field_boundaries_sf Field boundaries as SF object
+#' @param daily_CI_vals_dir Directory to save individual date RDS files
+#' @param merged_final_tif_dir Directory to save processed tiles with CI band (mirrors daily_tiles_split structure)
+#' @return Data frame with field CI statistics for the date
+#'
+extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL) {
+
+ if (!inherits(field_boundaries_sf, "sf")) {
+ field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf)
+ }
+
+ safe_log(paste(" Processing", length(tile_files), "tiles for date", date, "(parallel processing)"))
+
+ # Process tiles in parallel using furrr::future_map
+ # This replaces the sequential for loop, processing 2-4 tiles simultaneously
+ stats_list <- furrr::future_map(
+ .x = tile_files,
+ .f = ~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir),
+ .options = furrr::furrr_options(seed = TRUE)
+ )
+
+ # Extract names and filter out NULL results (failed tiles)
+ tile_names <- basename(tile_files)
+ all_stats <- stats_list[!sapply(stats_list, is.null)]
+ names(all_stats) <- tile_names[!sapply(stats_list, is.null)]
+
+ if (length(all_stats) == 0) {
+ return(NULL)
+ }
+
+ # Combine all tiles and aggregate to field level
+ # Use dplyr::bind_rows() to handle column name inconsistencies gracefully
+ combined_tiles <- dplyr::bind_rows(all_stats)
+ rownames(combined_tiles) <- NULL
+
+ # Aggregate: For each field, compute mean CI across all tiles
+ aggregated <- combined_tiles %>%
+ group_by(across(-c(mean_CI, tile_file))) %>%
+ summarise(!!date := round(mean(mean_CI, na.rm = TRUE), 2), .groups = "drop")
+
+ # Save individual date file (matching legacy workflow format: extracted_YYYY-MM-DD_quadrant.rds)
+ if (!is.null(daily_CI_vals_dir)) {
+ save_path <- file.path(daily_CI_vals_dir, paste0("extracted_", date, "_quadrant.rds"))
+ saveRDS(aggregated, save_path)
+ safe_log(paste("[RDS SAVED] Date:", date, "-> File: extracted_", date, "_quadrant.rds"))
+ }
+
+ return(aggregated)
+}
+
+#' Create CI band from available bands (if not pre-computed)
+#'
+#' @param raster Loaded raster object
+#' @param raster_info Output from detect_raster_structure()
+#' @return Single-layer raster with CI band
+#'
+create_ci_band <- function(raster, raster_info) {
+ if (raster_info$type == "4b") {
+ # Calculate NDVI for 4-band data: (NIR - Red) / (NIR + Red)
+ red <- raster[[raster_info$red_idx]]
+ nir <- raster[[raster_info$nir_idx]]
+ ci <- (nir - red) / (nir + red)
+ } else if (raster_info$type == "8b") {
+ # Use RedEdge for 8-band data: (NIR - RedEdge) / (NIR + RedEdge)
+ red_edge <- raster[[raster_info$red_idx]]
+ nir <- raster[[raster_info$nir_idx]]
+ ci <- (nir - red_edge) / (nir + red_edge)
+ } else {
+ stop("Unsupported raster type")
+ }
+
+ # Apply cloud mask if available (UDM band)
+ if (!is.na(raster_info$udm_idx)) {
+ udm <- raster[[raster_info$udm_idx]]
+ ci <- terra::mask(ci, udm, maskvalues = 0)
+ }
+
+ return(ci)
+}
\ No newline at end of file
diff --git a/r_app/mosaic_creation_tile_utils.R b/r_app/mosaic_creation_tile_utils.R
new file mode 100644
index 0000000..3a3af35
--- /dev/null
+++ b/r_app/mosaic_creation_tile_utils.R
@@ -0,0 +1,286 @@
+# MOSAIC_CREATION_TILE_UTILS.R
+# ============================
+# Tile-based processing utilities for scalable weekly mosaic creation
+#
+# STRATEGY: Memory-efficient, scalable approach for large study areas
+# Split daily full-extent mosaics into fixed-size tiles (e.g., 5×5 km),
+# then process each tile position across all days, and reassemble.
+#
+# WORKFLOW:
+# Daily full-extent TIFF
+# ↓
+# Split into N×M tiles (based on area size / tile_size)
+# E.g., 50×80 km area with 5 km tiles = 10×16 = 160 tiles
+# ↓
+# For EACH TILE POSITION (1 to 160):
+# - Load that tile from all 7 days
+# - Create MAX composite for that tile
+# - Save tile_NNN_MAX.tif
+# ↓
+# Reassemble all 160 MAX tiles back into full-extent
+# ↓
+# Save weekly full-extent mosaic
+#
+# KEY BENEFIT: Memory usage ~50 MB per tile (5×5 km), not 1.3 GB (full 50×80 km)
+# Scales automatically: bigger area = more tiles, same memory per tile
+#
+# TILE_SIZE: Configurable (default 5000 m = 5 km, adjustable via parameter)
+
+#' Simple tile-based processing using terra::makeTiles()
+#'
+#' Calculates tile grid based on desired tile SIZE (e.g., 5 km), not grid count.
+#' This makes it SCALABLE: bigger area = more tiles automatically.
+#'
+#' @param raster_path Path to raster to split
+#' @param output_dir Directory to save tiles
+#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km)
+#' @return List with tiles (file paths) and metadata
+#'
+split_raster_into_tiles <- function(raster_path, output_dir, tile_size_m = 5000) {
+ # Load raster
+ r <- terra::rast(raster_path)
+
+ # Calculate how many tiles we need based on raster extent and tile size
+ x_range <- terra::ext(r)$xmax - terra::ext(r)$xmin
+ y_range <- terra::ext(r)$ymax - terra::ext(r)$ymin
+
+ n_tiles_x <- ceiling(x_range / tile_size_m)
+ n_tiles_y <- ceiling(y_range / tile_size_m)
+
+ safe_log(paste("Splitting into", n_tiles_x, "×", n_tiles_y, "tiles of",
+ tile_size_m / 1000, "km"))
+
+ # Create output directory
+ dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+
+ # Use terra::makeTiles() - it splits based on n = c(rows, cols)
+ tiles <- terra::makeTiles(
+ r,
+ n = c(n_tiles_y, n_tiles_x), # rows, cols
+ filename = file.path(output_dir, "tile_.tif"),
+ overwrite = TRUE
+ )
+
+ safe_log(paste("Created", length(tiles), "tile files"))
+
+ return(list(
+ tiles = tiles,
+ n_tiles = length(tiles),
+ n_tiles_x = n_tiles_x,
+ n_tiles_y = n_tiles_y,
+ extent = terra::ext(r),
+ raster = raster_path
+ ))
+}
+
+
+#' Create weekly MAX mosaic using TRUE tile-based processing
+#'
+#' TILE-BASED WORKFLOW (Memory efficient):
+#' 1. Calculate tile grid dynamically from extent and FIXED tile_size (e.g., 5 km)
+#' 2. For EACH TILE across all 7 days:
+#' - Load that tile from each daily file (small ~50 MB, not 1.3 GB)
+#' - Create MAX composite for just that tile
+#' 3. Reassemble all tiles into final full-extent mosaic
+#'
+#' SCALABILITY: Fixed 5 km tile size means bigger area = more tiles (automatic scaling)
+#'
+#' @param dates List from date_list() with days_filter
+#' @param merged_final_dir Directory with daily merged full-extent TIFFs
+#' @param final_output_dir Directory to store final reassembled mosaic
+#' @param file_name_tif Output filename for final mosaic
+#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km). Bigger area automatically gets more tiles.
+#' @return Path to final reassembled weekly mosaic
+#'
+create_weekly_mosaic_tiled <- function(dates, merged_final_dir,
+ final_output_dir, file_name_tif,
+ tile_size_m = 5000) {
+
+ # Get daily files for this week
+ daily_files <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$")
+ daily_files <- daily_files[grepl(paste(dates$days_filter, collapse = "|"), daily_files)]
+
+ if (length(daily_files) == 0) {
+ safe_log("No daily files found for this week", "ERROR")
+ return(NULL)
+ }
+
+ safe_log(paste("Found", length(daily_files), "daily files for week", dates$week))
+
+ # Load first daily file to get extent and calculate tile grid
+ safe_log("Step 1: Loading first daily file to establish tile structure")
+ first_raster <- terra::rast(daily_files[1])
+
+ # Get CRS and convert tile_size_m to appropriate units
+ raster_crs <- terra::crs(first_raster)
+
+ # If raster is in lat/lon (geographic), convert tile_size_m to degrees
+ # 1 degree latitude ≈ 111 km, so 5000 m ≈ 0.045 degrees
+ # Check for GEOGCRS (geographic coordinate system) in WKT string
+ is_geographic <- grepl("GEOGCRS|longlat|geographic|ANGLEUNIT.*degree",
+ as.character(raster_crs), ignore.case = TRUE)
+
+ if (is_geographic) {
+ # Geographic CRS - convert meters to degrees
+ # At equator: 1 degree ≈ 111 km
+ tile_size_degrees <- tile_size_m / 111000 # 111 km per degree
+ safe_log(paste("Raster is in geographic CRS (lat/lon). Converting", tile_size_m / 1000,
+ "km to", round(tile_size_degrees, 4), "degrees"))
+ } else {
+ # Projected CRS - use meters directly
+ tile_size_degrees <- tile_size_m
+ safe_log(paste("Raster is in projected CRS. Using", tile_size_m / 1000, "km directly"))
+ }
+
+ # Calculate n_tiles dynamically from extent and tile_size
+ x_range <- terra::ext(first_raster)$xmax - terra::ext(first_raster)$xmin
+ y_range <- terra::ext(first_raster)$ymax - terra::ext(first_raster)$ymin
+
+ n_tiles_x <- ceiling(x_range / tile_size_degrees)
+ n_tiles_y <- ceiling(y_range / tile_size_degrees)
+ n_tiles_total <- n_tiles_x * n_tiles_y
+
+ safe_log(paste("Step 2: Creating tile grid:", tile_size_m / 1000, "km tiles"))
+ safe_log(paste(" Extent:", round(x_range, 4), "° ×", round(y_range, 4), "°"))
+ safe_log(paste(" Grid:", n_tiles_y, "rows ×", n_tiles_x, "columns =", n_tiles_total, "tiles"))
+
+ # Calculate tile extents mathematically (no need to save temp files)
+ extent <- terra::ext(first_raster)
+ x_min <- extent$xmin
+ y_min <- extent$ymin
+
+ # Create list of tile extents
+ tile_extents <- list()
+ tile_idx <- 0
+
+ for (row in 1:n_tiles_y) {
+ for (col in 1:n_tiles_x) {
+ tile_idx <- tile_idx + 1
+
+ # Calculate this tile's bounds
+ tile_xmin <- x_min + (col - 1) * tile_size_degrees
+ tile_xmax <- min(tile_xmin + tile_size_degrees, extent$xmax)
+ tile_ymin <- y_min + (row - 1) * tile_size_degrees
+ tile_ymax <- min(tile_ymin + tile_size_degrees, extent$ymax)
+
+ tile_extents[[tile_idx]] <- terra::ext(tile_xmin, tile_xmax, tile_ymin, tile_ymax)
+ }
+ }
+
+ safe_log(paste("Calculated", length(tile_extents), "tile extents"))
+
+ # Save tiles to Laravel storage directory
+ tile_storage_dir <- file.path("laravel_app", "storage", "app", "angata", "daily_tiles")
+ dir.create(tile_storage_dir, recursive = TRUE, showWarnings = FALSE)
+
+ # For each tile, load from all daily files and create MAX
+ safe_log("Step 3: Processing tiles (load per-tile across all days, create MAX for each)")
+ tile_files_list <- list()
+
+ for (tile_idx in seq_along(tile_extents)) {
+ if (tile_idx %% max(1, ceiling(n_tiles_total / 10)) == 0 || tile_idx == 1) {
+ safe_log(paste(" Processing tile", tile_idx, "of", n_tiles_total))
+ }
+
+ # Get this tile's extent
+ tile_extent <- tile_extents[[tile_idx]]
+
+ # Load and crop all daily files to this tile extent
+ daily_tile_data <- list()
+ for (file_idx in seq_along(daily_files)) {
+ tryCatch({
+ r <- terra::rast(daily_files[file_idx])
+ cropped <- terra::crop(r, tile_extent, snap = "in")
+ daily_tile_data[[file_idx]] <- cropped
+ }, error = function(e) {
+ safe_log(paste("Warning: Could not crop tile", tile_idx, "from",
+ basename(daily_files[file_idx])), "WARNING")
+ })
+ }
+
+ if (length(daily_tile_data) == 0) {
+ safe_log(paste("No valid data for tile", tile_idx), "WARNING")
+ next
+ }
+
+ # Create MAX composite for this tile
+ if (length(daily_tile_data) == 1) {
+ tile_max <- daily_tile_data[[1]]
+ } else {
+ collection <- terra::sprc(daily_tile_data)
+ tile_max <- terra::mosaic(collection, fun = "max")
+ }
+
+ # Save tile to disk (keeps memory low)
+ tile_file <- file.path(tile_storage_dir, sprintf("tile_%04d.tif", tile_idx))
+ terra::writeRaster(tile_max, tile_file, overwrite = TRUE)
+ tile_files_list[[tile_idx]] <- tile_file
+ }
+
+ if (length(tile_files_list) == 0) {
+ safe_log("No tiles processed successfully", "ERROR")
+ return(NULL)
+ }
+
+ safe_log(paste("Step 4: Reassembling", length(tile_files_list), "tiles from disk into full-extent mosaic"))
+
+ # Load all tile files and reassemble
+ tile_rasters <- lapply(tile_files_list, terra::rast)
+ collection <- terra::sprc(tile_rasters)
+ final_mosaic <- terra::mosaic(collection, fun = "first")
+
+ safe_log("Step 5: Saving final reassembled mosaic")
+
+ # Save
+ dir.create(final_output_dir, recursive = TRUE, showWarnings = FALSE)
+ final_file_path <- file.path(final_output_dir, file_name_tif)
+
+ tryCatch({
+ terra::writeRaster(final_mosaic, final_file_path, overwrite = TRUE)
+ safe_log(paste("✓ Weekly mosaic saved to:", final_file_path))
+ }, error = function(e) {
+ safe_log(paste("Error saving mosaic:", e$message), "ERROR")
+ return(NULL)
+ })
+
+ # Cleanup temporary tile files
+ unlink(tile_storage_dir, recursive = TRUE)
+ safe_log("Cleaned up temporary tile files")
+
+ return(final_file_path)
+}
+
+#' Load tile MAX rasters for a specific week (for per-tile analysis)
+#'
+#' @param week Week number
+#' @param tile_dir Directory containing tile mosaics (week subdirectories)
+#' @return List of tile rasters with tile_id metadata
+#'
+load_tile_mosaics <- function(week, tile_dir) {
+ week_dir <- file.path(tile_dir, paste0("week_", week))
+
+ if (!dir.exists(week_dir)) {
+ warning(paste("Tile directory not found:", week_dir))
+ return(NULL)
+ }
+
+ # Load all tile files
+ tile_files <- list.files(week_dir, pattern = "^tile_.*\\.tif$", full.names = TRUE)
+
+ if (length(tile_files) == 0) {
+ warning("No tile files found in:", week_dir)
+ return(NULL)
+ }
+
+ # Sort by tile ID
+ tile_numbers <- as.numeric(gsub(".*tile_(\\d+).*", "\\1", tile_files))
+ tile_files <- tile_files[order(tile_numbers)]
+
+ # Load rasters
+ tiles_list <- lapply(tile_files, terra::rast)
+ names(tiles_list) <- sprintf("tile_%03d", sort(tile_numbers))
+
+ safe_log(paste("Loaded", length(tiles_list), "tile mosaics for week", week))
+
+ return(tiles_list)
+}
diff --git a/r_app/mosaic_creation_utils.R b/r_app/mosaic_creation_utils.R
index d7c0e58..a0686b2 100644
--- a/r_app/mosaic_creation_utils.R
+++ b/r_app/mosaic_creation_utils.R
@@ -89,7 +89,7 @@ create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir,
safe_log("VRT list created, assessing cloud cover for mosaic creation")
# Calculate aggregated cloud cover statistics (returns data frame for image selection)
- cloud_coverage_stats <- count_cloud_coverage(vrt_list, merged_final_dir)
+ cloud_coverage_stats <- count_cloud_coverage(vrt_list, merged_final_dir, field_boundaries)
# Create mosaic based on cloud cover assessment
mosaic <- create_mosaic(raster_files_final, cloud_coverage_stats, field_boundaries)
@@ -147,9 +147,10 @@ find_vrt_files <- function(vrt_directory, dates) {
#'
#' @param vrt_list List of VRT file paths (used to extract dates for TIF file lookup)
#' @param merged_final_dir Directory containing the actual TIF files (e.g., merged_final_tif)
+#' @param field_boundaries Field boundaries (sf object) for calculating cloud cover over field areas only
#' @return Data frame with aggregated cloud statistics for each TIF file (used for mosaic selection)
#'
-count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL) {
+count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_boundaries = NULL) {
if (length(vrt_list) == 0) {
warning("No VRT files provided for cloud coverage calculation")
return(NULL)
@@ -187,9 +188,34 @@ count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL) {
# Extract the CI band (last band)
ci_band <- current_raster[[terra::nlyr(current_raster)]]
- # Count notNA pixels across entire raster
- total_notna <- terra::global(ci_band, fun = "notNA")$notNA
- total_pixels <- terra::ncell(ci_band)
+ # Create a unique field mask for THIS raster's extent
+ # This handles cases where rasters have different extents due to missing data
+ total_notna <- NA_real_
+ total_pixels <- NA_real_
+
+ if (!is.null(field_boundaries)) {
+ tryCatch({
+ # Create mask specific to this raster's grid
+ field_mask <- terra::rasterize(field_boundaries, ci_band, field = 1)
+
+ # Count pixels within field boundaries (for this specific raster)
+ total_pixels <- terra::global(field_mask, fun = "notNA")$notNA
+
+ # Cloud coverage calculated only over field areas
+ ci_field_masked <- terra::mask(ci_band, field_mask, maskvalue = NA)
+ total_notna <- terra::global(ci_field_masked, fun = "notNA")$notNA
+
+ }, error = function(e) {
+ # If field mask creation fails, fall back to entire raster
+ safe_log(paste("Could not create field mask for", basename(tif_file), ":", e$message), "WARNING")
+ })
+ }
+
+ # If field mask failed, use entire raster
+ if (is.na(total_notna)) {
+ total_notna <- terra::global(ci_band, fun = "notNA")$notNA
+ total_pixels <- terra::ncell(ci_band)
+ }
# Calculate cloud coverage percentage (missing = clouds)
missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
@@ -296,15 +322,25 @@ create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NU
}
# Get filenames of best-coverage images
- # Match by finding filenames that match the dates in cloud_coverage_stats
+ # Match by extracting tile ID from both cloud stats and TIF filenames
rasters_to_use <- character()
for (idx in best_coverage) {
- # Extract date from cloud_coverage_stats filename
+ # Extract tile ID from cloud_coverage_stats filename (e.g., "tile_18" → 18)
cc_filename <- cloud_coverage_stats$filename[idx]
- # Find matching TIF file
- matching_tif <- tif_files[grepl(basename(cc_filename), basename(tif_files), fixed = TRUE)]
- if (length(matching_tif) > 0) {
- rasters_to_use <- c(rasters_to_use, matching_tif[1])
+ cc_tile_id <- gsub(".*_([0-9]+).*", "\\1", cc_filename)
+
+ # Find matching TIF file by matching tile ID
+ matching_tif <- NULL
+ for (tif_file in tif_files) {
+ tif_tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", basename(tif_file))
+ if (tif_tile_id == cc_tile_id) {
+ matching_tif <- tif_file
+ break
+ }
+ }
+
+ if (!is.null(matching_tif)) {
+ rasters_to_use <- c(rasters_to_use, matching_tif)
}
}
@@ -321,24 +357,105 @@ create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NU
safe_log(paste("Using single image for mosaic:", basename(rasters_to_use)))
mosaic <- terra::rast(rasters_to_use[1])
} else {
- # Multiple files - create mosaic using max function
+ # Multiple files - merge handles different extents/grids automatically
safe_log(paste("Creating mosaic from", length(rasters_to_use), "images"))
- rsrc <- terra::sprc(rasters_to_use)
- mosaic <- terra::mosaic(rsrc, fun = "max")
- }
+
+ # Load all rasters with error handling - only keep successful loads
+ all_rasters <- Filter(Negate(is.null), lapply(rasters_to_use, function(f) {
+ tryCatch({
+ terra::rast(f)
+ }, error = function(e) {
+ safe_log(paste("Warning: Could not load", basename(f), ":", e$message), "WARNING")
+ NULL # Return NULL on error, will be filtered out
+ })
+ }))
+
+ # Check what we loaded
+ safe_log(paste("Loaded", length(all_rasters), "valid rasters from", length(rasters_to_use), "files"))
+
+ if (length(all_rasters) == 0) {
+ safe_log("No valid rasters to merge", "WARNING")
+ return(NULL)
+ }
+
+ # Merge all rasters (handles different extents and grids automatically)
+ if (length(all_rasters) == 1) {
+ mosaic <- all_rasters[[1]]
+ safe_log("Using single raster after filtering")
+ } else {
+ # Create max composite: take maximum value at each pixel across all dates
+ # This skips clouds (low/zero CI values) in favor of clear pixels from other dates
+ mosaic <- tryCatch({
+ safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds"))
+
+ # Get extent from field boundaries if available, otherwise use raster intersection
+ if (!is.null(field_boundaries)) {
+ crop_extent <- terra::ext(field_boundaries)
+ safe_log("Using field boundaries extent for consistent area across all dates")
+ } else {
+ # Fallback: use intersection of all raster extents
+ crop_extent <- terra::ext(all_rasters[[1]])
+ for (i in 2:length(all_rasters)) {
+ crop_extent <- terra::intersect(crop_extent, terra::ext(all_rasters[[i]]))
+ }
+ safe_log("Using raster intersection extent")
+ }
+
+ # Crop all rasters to common extent
+ cropped_rasters <- lapply(all_rasters, function(r) {
+ terra::crop(r, crop_extent)
+ })
+
+ # Resample all cropped rasters to match the first one's grid
+ # This handles pixel grid misalignment from Python's dynamic extent adjustment
+ reference_grid <- cropped_rasters[[1]]
+ safe_log("Resampling rasters to common grid for stacking")
+
+ aligned_rasters <- lapply(cropped_rasters, function(r) {
+ if (identical(terra::ext(r), terra::ext(reference_grid)) &&
+ identical(terra::res(r), terra::res(reference_grid))) {
+ return(r) # Already aligned
+ }
+ terra::resample(r, reference_grid, method = "near")
+ })
+
+ # Create max composite using mosaic on aligned rasters
+ # Resample ensures all rasters have matching grids (no resolution mismatch)
+ raster_collection <- terra::sprc(aligned_rasters)
+ max_mosaic <- terra::mosaic(raster_collection, fun = "max")
+
+ max_mosaic
+ }, error = function(e) {
+ safe_log(paste("Max composite creation failed:", e$message), "WARNING")
+ safe_log("Using first raster only as fallback")
+ all_rasters[[1]]
+ })
+ safe_log(paste("Max composite created - taking clearest pixel at each location"))
+ }
- # Ensure we have exactly 5 bands (R, G, B, NIR, CI)
- if (terra::nlyr(mosaic) != 5) {
- safe_log(paste("Warning: mosaic has", terra::nlyr(mosaic), "bands, expected 5"), "WARNING")
- if (terra::nlyr(mosaic) > 5) {
- # Keep only first 5 bands
- mosaic <- terra::subset(mosaic, 1:5)
- safe_log("Keeping only first 5 bands")
+ # Ensure we have exactly the required bands: Red, Green, Blue, NIR, CI
+ required_bands <- c("Red", "Green", "Blue", "NIR", "CI")
+ available_bands <- names(mosaic)
+
+ # Check if all required bands are present
+ if (!all(required_bands %in% available_bands)) {
+ safe_log(paste("Warning: Not all required bands found. Available:", paste(available_bands, collapse = ", ")), "WARNING")
+ }
+
+ # Select only the required bands in the correct order
+ if (all(required_bands %in% available_bands)) {
+ mosaic <- mosaic[[required_bands]]
+ safe_log("Selected Red, Green, Blue, NIR, CI bands")
+ } else {
+ safe_log(paste("Warning: mosaic has", terra::nlyr(mosaic), "bands, expected 5 (R, G, B, NIR, CI)"), "WARNING")
+ if (terra::nlyr(mosaic) > 5) {
+ # Keep only first 5 bands as fallback
+ mosaic <- terra::subset(mosaic, 1:5)
+ safe_log("Keeping only first 5 bands as fallback")
+ }
}
}
-
-
# Crop/mask to field boundaries if provided
if (!is.null(field_boundaries)) {
tryCatch({
@@ -432,3 +549,218 @@ save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALS
return(file_path)
}
+
+#' Create weekly mosaic from pre-split tiles with MAX aggregation
+#'
+#' This function processes tiles created by Script 01 and processed by Script 02.
+#' For each of the 25 tiles independently:
+#' 1. Collects that tile from all dates in the range
+#' 2. Calculates cloud coverage per date
+#' 3. Uses create_mosaic logic to select best dates (cloud-clean preferred)
+#' 4. Creates MAX composite for that tile
+#' 5. Saves to weekly_tile_max/tile_XX.tif
+#'
+#' Input: merged_final_tif/[DATE]/[TILE_01.tif, TILE_02.tif, ..., TILE_25.tif]
+#' Output: weekly_tile_max/tile_01.tif through tile_25.tif (25 weekly MAX tiles)
+#'
+#' @param dates List from date_list() containing days_filter vector
+#' @param merged_final_dir Directory containing processed tiles (merged_final_tif)
+#' @param tile_output_dir Directory to save weekly MAX tiles (weekly_tile_max)
+#' @param field_boundaries Field boundaries for cloud coverage calculation (optional)
+#' @return List of paths to created tile files
+#'
+create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output_dir, field_boundaries = NULL) {
+
+ safe_log("Starting per-tile mosaic creation with cloud-based date selection...")
+
+ # Create output directory if needed
+ dir.create(tile_output_dir, recursive = TRUE, showWarnings = FALSE)
+
+ # Step 1: Discover all tiles from all dates and group by tile ID
+ tile_groups <- list() # Structure: tile_groups$tile_01 = list of files for that tile across dates
+
+ for (date in dates$days_filter) {
+ date_dir <- file.path(merged_final_dir, date)
+
+ if (!dir.exists(date_dir)) {
+ safe_log(paste(" Skipping date:", date, "- directory not found"), "WARNING")
+ next
+ }
+
+ tile_files <- list.files(date_dir, pattern = "\\.tif$", full.names = TRUE)
+
+ if (length(tile_files) == 0) {
+ safe_log(paste(" No tiles found for date:", date), "WARNING")
+ next
+ }
+
+ # Extract tile ID from each filename (e.g., "2026-01-02_01.tif" → tile ID is "01")
+ for (tile_file in tile_files) {
+ # Extract tile number from filename
+ tile_basename <- basename(tile_file)
+ tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", tile_basename)
+
+ if (!tile_id %in% names(tile_groups)) {
+ tile_groups[[tile_id]] <- list()
+ }
+ tile_groups[[tile_id]][[length(tile_groups[[tile_id]]) + 1]] <- tile_file
+ }
+ }
+
+ if (length(tile_groups) == 0) {
+ stop("No tiles found in date range")
+ }
+
+ safe_log(paste("Found", length(tile_groups), "unique tiles across all dates"))
+
+ # Step 2: Process each tile independently
+ created_tiles <- character()
+
+ for (tile_id in names(tile_groups)) {
+ tile_files_for_this_id <- unlist(tile_groups[[tile_id]])
+
+ safe_log(paste("Processing tile", tile_id, "with", length(tile_files_for_this_id), "dates"))
+
+ # Step 2a: Calculate cloud coverage for this tile across all dates
+ cloud_stats_this_tile <- tryCatch({
+ count_cloud_coverage_for_tile(
+ tile_files = tile_files_for_this_id,
+ field_boundaries = field_boundaries
+ )
+ }, error = function(e) {
+ safe_log(paste(" Error calculating cloud coverage for tile", tile_id, "-", e$message), "WARNING")
+ NULL
+ })
+
+ if (is.null(cloud_stats_this_tile) || nrow(cloud_stats_this_tile) == 0) {
+ safe_log(paste(" No valid cloud stats for tile", tile_id, "- using all available dates"), "WARNING")
+ cloud_stats_this_tile <- NULL
+ }
+
+ # Step 2b: Create MAX mosaic for this tile using create_mosaic logic
+ tile_mosaic <- tryCatch({
+ create_mosaic(
+ tif_files = tile_files_for_this_id,
+ cloud_coverage_stats = cloud_stats_this_tile,
+ field_boundaries = NULL # Don't crop individual tiles
+ )
+ }, error = function(e) {
+ safe_log(paste(" Error creating mosaic for tile", tile_id, "-", e$message), "WARNING")
+ NULL
+ })
+
+ if (is.null(tile_mosaic)) {
+ safe_log(paste(" Failed to create mosaic for tile", tile_id, "- skipping"), "WARNING")
+ next
+ }
+
+ # Step 2c: Save this tile's weekly MAX mosaic
+ # Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1)
+ tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_",
+ sprintf("%02d", as.integer(tile_id)), ".tif")
+ tile_output_path <- file.path(tile_output_dir, tile_filename)
+
+ tryCatch({
+ terra::writeRaster(tile_mosaic, tile_output_path, overwrite = TRUE)
+ safe_log(paste(" ✓ Saved tile", tile_id, "to:", tile_filename))
+ created_tiles <- c(created_tiles, tile_output_path)
+ }, error = function(e) {
+ safe_log(paste(" Error saving tile", tile_id, "-", e$message), "WARNING")
+ })
+ }
+
+ safe_log(paste("✓ Created", length(created_tiles), "weekly MAX tiles in", tile_output_dir))
+
+ return(created_tiles)
+}
+
+#' Calculate cloud coverage for a single tile across multiple dates
+#'
+#' Helper function for per-tile cloud assessment.
+#' Takes tile files from different dates and calculates cloud coverage for each.
+#' Cloud coverage is calculated ONLY within field boundaries, so total_pixels
+#' varies per tile based on which fields are present in that tile area.
+#'
+#' @param tile_files Character vector of tile file paths from different dates
+#' @param field_boundaries Field boundaries for analysis (required for per-field counting)
+#' @return Data frame with cloud stats for each date/tile
+#'
+count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) {
+ if (length(tile_files) == 0) {
+ warning("No tile files provided for cloud coverage calculation")
+ return(NULL)
+ }
+
+ aggregated_results <- list()
+
+ for (idx in seq_along(tile_files)) {
+ tile_file <- tile_files[idx]
+
+ tryCatch({
+ # Load the tile
+ current_raster <- terra::rast(tile_file)
+
+ # Extract the CI band (last band in 5-band output)
+ ci_band <- current_raster[[terra::nlyr(current_raster)]]
+
+ # Calculate cloud coverage within field boundaries
+ if (!is.null(field_boundaries)) {
+ # Create a reference raster template (same extent/resolution as ci_band, but independent of its data)
+ # This ensures field_mask shows the potential field area even if ci_band is entirely cloudy (all NA)
+ ref_template <- terra::rast(terra::ext(ci_band), resolution = terra::res(ci_band),
+ crs = terra::crs(ci_band))
+ terra::values(ref_template) <- 1 # Fill entire raster with 1s
+
+ # Crop and mask to field boundaries: keeps 1 inside fields, NA outside
+ # This is independent of ci_band's data - represents the potential field area
+ field_mask <- terra::crop(ref_template, field_boundaries, mask = TRUE)
+
+ # Count total potential field pixels from the mask (independent of clouds)
+ total_pixels <- terra::global(field_mask, fun = "notNA")$notNA
+
+ # Now crop and mask CI band to field boundaries to count actual valid (non-cloud) pixels
+ ci_masked <- terra::crop(ci_band, field_boundaries, mask = TRUE)
+ total_notna <- terra::global(ci_masked, fun = "notNA")$notNA
+ } else {
+ # If no field boundaries provided, use entire tile
+ total_notna <- terra::global(ci_band, fun = "notNA")$notNA
+ total_pixels <- terra::ncell(ci_band)
+ }
+
+ # Cloud coverage percentage (missing = clouds)
+ missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
+
+ aggregated_results[[idx]] <- data.frame(
+ filename = paste0("tile_", sprintf("%02d", as.integer(gsub(".*_([0-9]+)\\.tif", "\\1", basename(tile_file))))),
+ notNA = total_notna,
+ total_pixels = total_pixels,
+ missing_pixels_percentage = missing_pct,
+ thres_5perc = as.integer(missing_pct < 5),
+ thres_40perc = as.integer(missing_pct < 45),
+ stringsAsFactors = FALSE
+ )
+
+ }, error = function(e) {
+ safe_log(paste("Error processing tile:", basename(tile_file), "-", e$message), "WARNING")
+ aggregated_results[[idx]] <<- data.frame(
+ filename = tile_file,
+ notNA = NA_real_,
+ total_pixels = NA_real_,
+ missing_pixels_percentage = 100,
+ thres_5perc = 0,
+ thres_40perc = 0,
+ stringsAsFactors = FALSE
+ )
+ })
+ }
+
+ # Combine results
+ aggregated_df <- if (length(aggregated_results) > 0) {
+ do.call(rbind, aggregated_results)
+ } else {
+ data.frame()
+ }
+
+ return(aggregated_df)
+}
+
diff --git a/r_app/05_CI_report_dashboard_planet.Rmd b/r_app/old_scripts/05_CI_report_dashboard_planet.Rmd
similarity index 99%
rename from r_app/05_CI_report_dashboard_planet.Rmd
rename to r_app/old_scripts/05_CI_report_dashboard_planet.Rmd
index 557c7bd..fd4de8f 100644
--- a/r_app/05_CI_report_dashboard_planet.Rmd
+++ b/r_app/old_scripts/05_CI_report_dashboard_planet.Rmd
@@ -49,7 +49,6 @@ suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
- library(exactextractr)
library(tidyverse)
library(tmap)
library(lubridate)
diff --git a/r_app/06_crop_messaging b/r_app/old_scripts/06_crop_messaging
similarity index 100%
rename from r_app/06_crop_messaging
rename to r_app/old_scripts/06_crop_messaging
diff --git a/r_app/packages/CIprep_0.1.4.tar.gz b/r_app/packages/CIprep_0.1.4.tar.gz
deleted file mode 100755
index 07eda40..0000000
Binary files a/r_app/packages/CIprep_0.1.4.tar.gz and /dev/null differ
diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R
index 04f84ed..8a8f7f1 100644
--- a/r_app/parameters_project.R
+++ b/r_app/parameters_project.R
@@ -36,6 +36,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
final = here(laravel_storage_dir, "merged_final_tif")
),
weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"),
+ weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"),
extracted_ci = list(
base = here(laravel_storage_dir, "Data/extracted_ci"),
daily = here(laravel_storage_dir, "Data/extracted_ci/daily_vals"),
@@ -61,7 +62,8 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
daily_CI_vals_dir = dirs$extracted_ci$daily,
cumulative_CI_vals_dir = dirs$extracted_ci$cumulative,
weekly_CI_mosaic = dirs$weekly_mosaic,
- daily_vrt = dirs$tif$merged, # Point to actual merged TIF/VRT directory instead of Data/vrt
+ daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction
+ weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output)
harvest_dir = dirs$harvest,
extracted_CI_dir = dirs$extracted_ci$base
))
diff --git a/r_app/system_architecture/system_architecture.md b/r_app/system_architecture/system_architecture.md
index 9919e1a..a14e572 100644
--- a/r_app/system_architecture/system_architecture.md
+++ b/r_app/system_architecture/system_architecture.md
@@ -1,473 +1,501 @@
-# SmartCane System Architecture
+# SmartCane System Architecture - R Pipeline & File-Based Processing
## Overview
-The SmartCane system is a comprehensive agricultural intelligence platform that processes satellite imagery and farm data to provide agronomic insights for sugarcane farmers. The system architecture follows a modular, layered approach with clear separation of concerns between data acquisition, processing, and presentation.
+The SmartCane system is a file-based agricultural intelligence platform that processes satellite imagery through a multi-stage R-script pipeline. Raw satellite imagery flows through sequential processing steps (CI extraction, growth model interpolation, mosaic creation, KPI analysis) with outputs persisted as GeoTIFFs, RDS files, and Excel/Word reports.
-## Architectural Layers
+## Processing Pipeline Overview
-The SmartCane system follows a layered architecture pattern, which is a standard approach in software engineering for organizing complex systems. This architecture divides the system into distinct functional layers, each with specific responsibilities. While these layers aren't explicitly shown as separate visual elements in the diagrams, they help conceptualize how components are organized by their function:
-
-
-
-### 1. Data Acquisition Layer
-- **Role**: Responsible for fetching raw data from external sources and user inputs
-- **Components**: Manual Sentinel Hub Requests, Python API Downloader, User Input Interface
-- **Functions**: Manual request setup on Sentinel Hub Requests Builder for specific client fields, connects to satellite data providers, downloads imagery, manages API credentials, performs preliminary data validation
-
-### 2. Processing Layer (SmartCane Engine)
-- **Role**: Core analytical engine that transforms raw data into actionable insights
-- **Components**: Python API Downloader (pre-processing), R Processing Engine (analytics)
-- **Functions**: Image processing, cloud masking, crop index calculation, field boundary processing, statistical analysis, report generation
-
-### 3. Presentation Layer
-- **Role**: Delivers insights to end users in accessible formats
-- **Components**: Laravel Web App, Email Delivery System
-- **Functions**: Interactive dashboards, visualization, report delivery, user management, project scheduling
-
-### 4. Data Storage Layer
-- **Role**: Persists system data across processing cycles
-- **Components**: File System, Database
-- **Functions**: Stores raw imagery, processed rasters, analytical results, user data, configuration
-
-## Key Subsystems
-
-### 1. Python API Downloader
-- **Role**: Acquires and pre-processes satellite imagery
-- **Inputs**: API credentials, field boundaries, date parameters, evaluation scripts
-- **Outputs**: Raw satellite images, merged GeoTIFFs, virtual rasters
-- **Interfaces**: External satellite APIs (Planet via Sentinel Hub), file system
-- **Orchestration**: Triggered by shell scripts from the Laravel application
-
-### 2. R Processing Engine
-- **Role**: Performs advanced analytics and generates insights
-- **Inputs**: Processed satellite imagery, field boundaries, harvest data, project parameters
-- **Outputs**: Crop indices, mosaics, RDS data files, agronomic reports
-- **Interfaces**: File system, report templates
-- **Orchestration**: Triggered by shell scripts from the Laravel application
-
-### 3. Laravel Web Application
-- **Role**: Provides operator interface and orchestrates the overall system
-- **Inputs**: User data, configuration settings
-- **Outputs**: Web interface, scheduling, report delivery
-- **Interfaces**: Users, database, file system
-- **Orchestration**: Controls execution of the SmartCane Engine via shell scripts
-
-### 4. Shell Script Orchestration
-- **Role**: Bridges between web application and processing components
-- **Functions**: Triggers processing workflows, manages execution environment, handles errors
-- **Examples**: runcane.sh, runpython.sh, build_mosaic.sh, build_report.sh
-
-## Data Flow
-
-### Overview
-The SmartCane system processes data through a series of well-defined stages, each building upon the previous stage's outputs. Data flows from raw satellite imagery through multiple transformation and analysis steps before becoming actionable insights.
-
-### Detailed Data Flow by Stage
-
-#### 1. **Input Stage** - Data Acquisition Setup
- - **Actors**: System operators (internal team)
- - **Actions**:
- - Manually prepare and submit requests on Sentinel Hub Requests Builder for specific client fields
- - Upload farm data (field boundaries in GeoJSON, harvest data in Excel) via Laravel Web App
- - Configure project parameters (date ranges, project directory, analysis thresholds)
- - **Outputs**:
- - API credentials stored in environment variables
- - Field boundaries: `laravel_app/storage/app/{project}/Data/pivot.geojson` (or `pivot_2.geojson` for ESA)
- - Harvest data: `laravel_app/storage/app/{project}/Data/harvest.xlsx`
- - Project metadata stored in database
-
-#### 2. **Acquisition Stage** - Raw Satellite Data Download
- - **Trigger**: Laravel scheduler or manual execution via shell scripts (`runpython.sh`, `01_run_planet_download.sh`)
- - **Script**: `python_app/01_planet_download.py` (or `.ipynb`)
- - **Inputs**:
- - API credentials (`SH_CLIENT_ID`, `SH_CLIENT_SECRET`)
- - Collection ID for BYOC (Bring Your Own Collection)
- - Field boundaries GeoJSON
- - Date range parameters (`DATE`, `DAYS` environment variables)
- - Evalscript for band selection and cloud masking
- - **Process**:
- 1. Parse date range into daily slots
- 2. Load field boundaries and split into bounding boxes (5x5 grid)
- 3. Check image availability via Sentinel Hub Catalog API
- 4. Download images tile by tile for each date and bbox
- 5. Merge tiles into daily mosaics using GDAL
- 6. Clean up intermediate files
- - **Intermediate Data**:
- - Raw tiles: `single_images/{date}/response.tiff`
- - Virtual rasters: `merged_virtual/merged{date}.vrt`
- - **Outputs**:
- - Daily merged GeoTIFFs: `merged_tif/{date}.tif` (4 bands: Red, Green, Blue, NIR)
- - File naming convention: `YYYY-MM-DD.tif`
- - **Parameters Used**:
- - `resolution = 3` (meters per pixel)
- - `max_threads = 15` for download
- - Grid split: `(5, 5)` bounding boxes
-
-#### 3. **Processing Stage - Step 1: CI Extraction**
- - **Trigger**: Shell script `02_run_ci_extraction.sh`
- - **Script**: `r_app/02_ci_extraction.R` + `ci_extraction_utils.R`
- - **Inputs**:
- - Daily merged GeoTIFFs from acquisition stage
- - Field boundaries (loaded via `parameters_project.R`)
- - Command-line args: `end_date`, `offset` (days lookback), `project_dir`
- - **Process**:
- 1. Load configuration via `parameters_project.R`
- 2. Generate date list for processing week
- 3. For each daily image:
- - Calculate Canopy Index: `CI = NIR / Green - 1`
- - Crop to field boundaries
- - Mask invalid pixels (0 values → NA)
- - Create VRT for fast access
- 4. Extract CI statistics per field using `exactextractr`
- 5. Save daily extractions as RDS files
- 6. Combine daily extractions into cumulative dataset
- - **Intermediate Data**:
- - Processed daily rasters: `merged_final_tif/{date}.tif` (5 bands: R, G, B, NIR, CI)
- - Daily VRTs: `Data/vrt/{date}.vrt`
- - Daily extractions: `Data/extracted_ci/daily_vals/extracted_{date}_whole_field.rds`
- - **Outputs**:
- - Cumulative CI dataset: `Data/extracted_ci/cumulative_vals/combined_CI_data.rds`
- - Structure: `field`, `sub_field`, `{date1}`, `{date2}`, ... (wide format)
- - **Calculations**:
- - Canopy Index (CI) formula: `(NIR / Green) - 1`
- - Statistics per field: `mean`, `count`, `notNA`
-
-#### 4. **Processing Stage - Step 2: Growth Model Interpolation**
- - **Trigger**: Shell script `03_run_growth_model.sh`
- - **Script**: `r_app/03_interpolate_growth_model.R` + `growth_model_utils.R`
- - **Inputs**:
- - Combined CI data (from Step 1)
- - Harvest data with season dates
- - Command-line arg: `project_dir`
- - **Process**:
- 1. Load combined CI data and pivot to long format
- 2. For each field and season:
- - Filter CI measurements within season dates
- - Apply linear interpolation to fill daily gaps: `approxfun()`
- - Calculate daily CI and cumulative CI
- 3. Combine all fields and seasons
- - **Outputs**:
- - Interpolated growth model: `Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds`
- - Structure: `Date`, `DOY`, `FitData`, `value`, `CI_per_day`, `cumulative_CI`, `model`, `season`, `subField`, `field`
- - **Calculations**:
- - `CI_per_day` = today's CI - yesterday's CI
- - `cumulative_CI` = cumulative sum of daily CI values
- - `DOY` (Day of Year) = sequential days from season start
-
-#### 5. **Processing Stage - Step 3: Weekly Mosaic Creation**
- - **Trigger**: Shell script `04_run_mosaic_creation.sh`
- - **Script**: `r_app/04_mosaic_creation.R` + `mosaic_creation_utils.R`
- - **Inputs**:
- - Daily VRT files
- - Field boundaries
- - Command-line args: `end_date`, `offset`, `project_dir`, `file_name` (optional)
- - **Process**:
- 1. Find VRT files matching date range
- 2. Assess cloud coverage for each image:
- - Calculate % missing pixels per image
- - Classify as: <5% cloud (good), <45% cloud (acceptable), >45% (poor)
- 3. Select best images based on cloud coverage
- 4. Create composite using `max` function across selected images
- 5. Crop to field boundaries
- - **Outputs**:
- - Weekly mosaic: `weekly_mosaic/week_{WW}_{YYYY}.tif` (5 bands: R, G, B, NIR, CI)
- - ISO week numbering used (e.g., `week_41_2025.tif`)
- - **Parameters**:
- - `CLOUD_THRESHOLD_STRICT = 5%` - Preferred images
- - `CLOUD_THRESHOLD_RELAXED = 45%` - Acceptable images
- - Mosaic function: `max` (takes highest CI value across images)
-
-#### 6. **Processing Stage - Step 4: KPI Calculation**
- - **Trigger**: Shell script `09_run_calculate_kpis.sh`
- - **Script**: `r_app/09_calculate_kpis.R` + `kpi_utils.R`
- - **Inputs**:
- - Current week mosaic
- - Previous week mosaic
- - Field boundaries
- - Harvest data (for tonnage_ha)
- - Cumulative CI data (for yield prediction)
- - Command-line args: `end_date`, `offset`, `project_dir`
- - **Process** (6 KPIs calculated):
- 1. **Field Uniformity**: Calculate CV (coefficient of variation) per field
- 2. **Area Change**: Compare current vs previous week CI, classify as improving/stable/declining
- 3. **TCH Forecasted**: Train Random Forest model on historic yield data, predict for mature fields (≥240 days)
- 4. **Growth Decline**: Detect declining fields using CI change + spatial autocorrelation (Moran's I)
- 5. **Weed Presence**: Detect rapid CI increase (>2.0 units/week) in young fields (<240 days)
- 6. **Gap Filling**: Identify areas with low CI (lowest 25th percentile)
- - **Outputs**:
- - KPI results: `reports/kpis/kpi_results_week{WW}.rds`
- - Summary tables: `reports/kpis/{project}_kpi_summary_tables_week{WW}.rds`
- - Field details: `reports/kpis/{project}_field_details_week{WW}.rds`
- - CSV exports: `reports/kpis/csv/*.csv`
- - **Calculations & Thresholds**:
- - CV thresholds: <0.15 (Excellent), <0.25 (Good), <0.35 (Moderate), ≥0.35 (Poor)
- - Change threshold: ±0.5 CI units
- - Weed threshold: >2.0 CI units/week increase + <10% area (Low), <25% (Moderate), ≥25% (High)
- - Decline risk: Combines CI decrease severity with spatial heterogeneity
- - Canopy closure age: 240 days (8 months)
-
-#### 7. **Processing Stage - Step 5: Report Generation**
- - **Trigger**: Shell script `10_run_kpi_report.sh`
- - **Script**: `r_app/10_CI_report_with_kpis_simple.Rmd` (R Markdown)
- - **Inputs**:
- - Weekly mosaics (current and previous)
- - KPI results from Step 4
- - Field boundaries
- - Project parameters
- - **Process**:
- 1. Load weekly mosaics and KPI data
- 2. Generate visualizations:
- - RGB field maps
- - CI heatmaps
- - Change detection maps
- - KPI summary charts
- 3. Create field-by-field analysis pages
- 4. Compile into Word document
- - **Outputs**:
- - Executive report: `reports/SmartCane_Report_week{WW}_{YYYY}.docx`
- - HTML version (optional): `reports/SmartCane_Report_week{WW}_{YYYY}.html`
-
-#### 8. **Output Stage** - Insight Delivery
- - **Actors**: Laravel Web App, Email system
- - **Actions**:
- - Laravel accesses generated reports from file system
- - Reports are attached to emails
- - Emails sent to configured recipients
- - (Future) Reports displayed in web dashboard
- - **Outputs**:
- - Email reports delivered to farm managers
- - PDF/DOCX attachments with weekly analysis
- - (Future) Interactive web dashboard
-
-### Data Persistence and Storage
-
-#### File System Structure
```
-laravel_app/storage/app/{project}/
-├── Data/
-│ ├── pivot.geojson (or pivot_2.geojson) # Field boundaries
-│ ├── harvest.xlsx # Historic yield data
-│ ├── vrt/ # Virtual rasters (daily)
-│ └── extracted_ci/
-│ ├── daily_vals/ # Daily CI extractions (RDS)
-│ └── cumulative_vals/
-│ ├── combined_CI_data.rds # Cumulative wide-format CI
-│ └── All_pivots_Cumulative_CI_quadrant_year_v2.rds # Interpolated growth model
-├── merged_tif/ # Raw daily downloads (4 bands)
-├── merged_final_tif/ # Processed daily CI rasters (5 bands)
-├── weekly_mosaic/ # Weekly composite mosaics
-│ └── week_{WW}_{YYYY}.tif
-├── reports/
-│ ├── kpis/ # KPI calculation results
-│ │ ├── kpi_results_week{WW}.rds
-│ │ ├── csv/ # CSV exports
-│ │ └── field_level/ # Per-field KPI data
-│ ├── SmartCane_Report_week{WW}_{YYYY}.docx # Final reports
-│ └── SmartCane_Report_week{WW}_{YYYY}.html
-└── logs/
- └── {YYYYMMDD}.log # Execution logs
+Python Download → Daily GeoTIFFs → CI Extraction (RDS) → Growth Model (RDS) → Weekly Mosaics (TIF)
+ ↓ ↓
+ Cumulative CI Data ←─────────────────── KPI Calculation
+ ↓
+ Field Analysis & Report Generation
+ ↓
+ Excel + Word Outputs
```
-#### Database Storage
-- **Projects table**: Project metadata, client info, scheduling
-- **Users table**: Operator and client accounts
-- **Execution logs**: Script run history, success/failure tracking
-- **Email queue**: Pending report deliveries
-
-### Key Parameters by Stage
-
-| Stage | Parameter | Source | Value/Default |
-|-------|-----------|--------|---------------|
-| **Download** | Resolution | Hardcoded | 3 meters |
-| | Days lookback | Env var `DAYS` | 7-28 |
-| | Bbox split | Hardcoded | (5, 5) |
-| | Max threads | Hardcoded | 15 |
-| **CI Extraction** | Offset | Command-line | 7 days |
-| | Min valid pixels | Hardcoded | 100 |
-| | CI formula | Hardcoded | (NIR/Green) - 1 |
-| **Mosaic** | Cloud threshold strict | Hardcoded | 5% |
-| | Cloud threshold relaxed | Hardcoded | 45% |
-| | Composite function | Hardcoded | max |
-| | Week system | Hardcoded | ISO 8601 |
-| **KPI** | CV Excellent | Hardcoded | <0.15 |
-| | CV Good | Hardcoded | <0.25 |
-| | Weed threshold | Hardcoded | 2.0 CI/week |
-| | Canopy closure | Hardcoded | 240 days |
-| | Change threshold | Hardcoded | ±0.5 CI |
-
-### Intermediate Data Transformations
-
-1. **4-band to 5-band raster**: Raw download (R,G,B,NIR) → Processed (R,G,B,NIR,CI)
-2. **Wide to long CI data**: Combined CI data (wide by date) → Long format (Date, field, value)
-3. **Point measurements to continuous**: Sparse CI observations → Daily interpolated values
-4. **Daily to weekly**: Multiple daily images → Single weekly composite
-5. **Raster to statistics**: Spatial CI values → Per-field summary metrics
-6. **Field-level to farm-level**: Individual field KPIs → Aggregated farm summaries
-
-## System Integration Points
-
-- **Python-R Integration**: Data handover via file system (GeoTIFF, virtual rasters)
-- **Engine-Laravel Integration**: Orchestration via shell scripts, data exchange via file system and database
-- **User-System Integration**: Web interface, file uploads, email notifications
-
-## Developed/Customized Elements
-
-- **Custom Cloud Masking Algorithm**: Specialized for agricultural applications in tropical regions
-- **Crop Index Extraction Pipeline**: Tailored to sugarcane spectral characteristics
-- **Reporting Templates**: Designed for agronomic decision support
-- **Shell Script Orchestration**: Custom workflow management for the system's components
-
-## Strategic Role of Satellite Data
-
-Satellite data is central to the SmartCane system, providing:
-- Regular, non-invasive field monitoring
-- Detection of spatial patterns not visible from ground level
-- Historical analysis of crop performance
-- Early warning of crop stress or disease
-- Quantification of field variability for precision agriculture
-
-## Pilot Utilization Sites
-
-The SmartCane system is currently operational in Mozambique, Kenya, and Tanzania. Future pilot deployments and expansions are planned for Uganda, Colombia, Mexico, Guatemala, South Africa, and Zambia.
-
----
-
-## System Architecture Diagrams
-
-Below are diagrams illustrating the system architecture from different perspectives.
-
-### Detailed Data Flow and Processing Pipeline
-
-This comprehensive diagram shows the complete data processing pipeline from raw satellite data acquisition through to final report generation. It illustrates all major processing steps, intermediate data products, key parameters, and decision points in the SmartCane system.
+## Complete Processing Pipeline (Mermaid Diagram)
```mermaid
graph TD
%% ===== INPUTS =====
- subgraph INPUTS["📥 INPUTS"]
- API["fa:fa-key API Credentials
(SH_CLIENT_ID, SH_CLIENT_SECRET)"]
- FieldBoundaries["fa:fa-map Field Boundaries
pivot.geojson / pivot_2.geojson"]
- HarvestData["fa:fa-table Harvest Data
harvest.xlsx
(season dates, tonnage_ha)"]
- DateParams["fa:fa-calendar Date Parameters
(end_date, offset, days)"]
- ProjectConfig["fa:fa-cog Project Config
(project_dir, resolution=3m)"]
- end
-
+ API["🔑 API Credentials"]
+ Bounds["🗺️ Field Boundaries
(pivot.geojson)"]
+ Harvest["📊 Harvest Data
(harvest.xlsx)"]
+
%% ===== STAGE 1: DOWNLOAD =====
- subgraph DOWNLOAD["🛰️ STAGE 1: SATELLITE DATA DOWNLOAD"]
- Download["fa:fa-download 01_planet_download.py
───────────────
• Parse date range into slots
• Split field into 5x5 bboxes
• Check image availability
• Download tiles (4 bands)
• Merge with GDAL"]
-
- DownloadParams["📊 Parameters:
• resolution = 3m
• max_threads = 15
• bbox_split = (5,5)
• bands = [R,G,B,NIR]"]
-
- RawTiles["💾 Intermediate:
single_images/{date}/
response.tiff"]
-
- DailyMosaics["📦 OUTPUT:
merged_tif/{date}.tif
───────────────
4 bands: Red, Green,
Blue, NIR
3m resolution"]
- end
-
+ Download["STAGE 1: Satellite Download
01_planet_download.py"]
+ DL_Out["📦 OUTPUT
merged_tif/{date}.tif
(4 bands: RGBN)"]
+
%% ===== STAGE 2: CI EXTRACTION =====
- subgraph CI_EXTRACTION["🔬 STAGE 2: CI EXTRACTION"]
- CIScript["fa:fa-calculator 02_ci_extraction.R
───────────────
• Calculate CI = NIR/Green - 1
• Crop to field boundaries
• Mask invalid pixels
• Extract stats per field"]
-
- CIParams["📊 Parameters:
• offset = 7 days
• min_valid_pixels = 100
• mask zeros as NA"]
-
- ProcessedRasters["💾 Intermediate:
merged_final_tif/{date}.tif
5 bands + VRT files"]
-
- DailyExtract["💾 Intermediate:
extracted_ci/daily_vals/
extracted_{date}.rds
(field, sub_field, mean_CI)"]
-
- CombinedCI["📦 OUTPUT:
combined_CI_data.rds
───────────────
Wide format:
(field, date1, date2, ...)"]
- end
-
+ CI["STAGE 2: CI Extraction
02_ci_extraction.R"]
+ CI_Utils["[Utility]
ci_extraction_utils.R"]
+ CI_Out["📦 OUTPUT
combined_CI_data.rds
(wide format)"]
+
%% ===== STAGE 3: GROWTH MODEL =====
- subgraph GROWTH_MODEL["📈 STAGE 3: GROWTH MODEL INTERPOLATION"]
- GrowthScript["fa:fa-chart-line 03_interpolate_growth_model.R
───────────────
• Pivot CI to long format
• Filter by season dates
• Linear interpolation
• Calculate daily & cumulative CI"]
-
- GrowthParams["📊 Calculations:
• approxfun() interpolation
• CI_per_day = diff(CI)
• cumulative_CI = cumsum(CI)
• DOY = days from season start"]
-
- InterpolatedModel["📦 OUTPUT:
All_pivots_Cumulative_CI
_quadrant_year_v2.rds
───────────────
(Date, DOY, FitData,
CI_per_day, cumulative_CI,
field, season)"]
- end
-
+ Growth["STAGE 3: Growth Model
03_interpolate_growth_model.R"]
+ Growth_Utils["[Utility]
growth_model_utils.R"]
+ Growth_Out["📦 OUTPUT
All_pivots_Cumulative_CI
_quadrant_year_v2.rds"]
+
%% ===== STAGE 4: WEEKLY MOSAIC =====
- subgraph WEEKLY_MOSAIC["🗺️ STAGE 4: WEEKLY MOSAIC CREATION"]
- MosaicScript["fa:fa-images 04_mosaic_creation.R
───────────────
• Find VRTs for date range
• Assess cloud coverage
• Select best images
• Composite with max()"]
-
- CloudAssess["🔍 Cloud Assessment:
• <5% cloud → excellent
• <45% cloud → acceptable
• >45% cloud → poor"]
-
- MosaicParams["📊 Parameters:
• ISO week numbering
• composite = max
• crop to boundaries"]
-
- WeeklyMosaic["📦 OUTPUT:
weekly_mosaic/
week_{WW}_{YYYY}.tif
───────────────
5 bands: R,G,B,NIR,CI
Cloud-free composite"]
- end
-
- %% ===== STAGE 5: KPI CALCULATION =====
- subgraph KPI_CALC["📊 STAGE 5: KPI CALCULATION (6 KPIs)"]
- KPIScript["fa:fa-calculator-alt 09_calculate_kpis.R
───────────────
Calculates 6 KPIs from
current + previous week"]
-
- KPI1["1️⃣ Field Uniformity
• CV = SD / mean
• <0.15 = Excellent
• <0.25 = Good"]
-
- KPI2["2️⃣ Area Change
• Compare week over week
• Classify improving/
stable/declining areas"]
-
- KPI3["3️⃣ TCH Forecast
• Random Forest model
• Predict yield for fields
≥240 days old"]
-
- KPI4["4️⃣ Growth Decline
• CI decrease + spatial
autocorrelation (Moran's I)
• Risk scoring"]
-
- KPI5["5️⃣ Weed Presence
• Rapid CI increase
(>2.0 units/week)
• Only fields <240 days"]
-
- KPI6["6️⃣ Gap Filling
• Identify lowest 25%
CI areas
• Gap severity score"]
-
- KPIParams["📊 Thresholds:
• CV: 0.15, 0.25, 0.35
• Change: ±0.5 CI
• Weed: 2.0 CI/week
• Canopy: 240 days
• Moran's I: 0.7-0.95"]
-
- KPIResults["📦 OUTPUT:
kpi_results_week{WW}.rds
───────────────
• Summary tables
• Field-level details
• CSV exports"]
- end
-
- %% ===== STAGE 6: REPORTING =====
- subgraph REPORTING["📄 STAGE 6: REPORT GENERATION"]
- ReportScript["fa:fa-file-word 10_CI_report_with_kpis_simple.Rmd
───────────────
• Load mosaics & KPIs
• Generate visualizations
• Field-by-field analysis
• Compile to Word/HTML"]
-
- Visualizations["🎨 Visualizations:
• RGB field maps
• CI heatmaps
• Change detection
• KPI charts"]
-
- FinalReport["📦 OUTPUT:
SmartCane_Report
_week{WW}_{YYYY}.docx
───────────────
• Executive summary
• Farm-wide KPIs
• Field analysis pages
• Recommendations"]
- end
-
- %% ===== OUTPUTS =====
- subgraph OUTPUTS["📤 OUTPUTS & DELIVERY"]
- EmailDelivery["fa:fa-envelope Email Delivery
───────────────
Reports sent to
farm managers"]
-
- WebDashboard["fa:fa-desktop Web Dashboard
(Future)
───────────────
Interactive field
monitoring"]
- end
-
- %% ===== ORCHESTRATION =====
- Laravel["fa:fa-server Laravel Web App
───────────────
Schedules & triggers
processing pipeline"]
+ Mosaic["STAGE 4: Weekly Mosaic
04_mosaic_creation.R"]
+ Mosaic_Utils["[Utility]
mosaic_creation_utils.R"]
+ Mosaic_Out["📦 OUTPUT
weekly_mosaic/week_{WW}.tif
(5 bands: RGBNCI)"]
- ShellScripts["fa:fa-terminal Shell Scripts
───────────────
01-10_run_*.sh
Execute each stage"]
-
- %% ===== DATA FLOW CONNECTIONS =====
+ %% ===== STAGE 5: FIELD ANALYSIS =====
+ Field["STAGE 5: Field Analysis
09_field_analysis_weekly.R
(or 09b parallel)"]
+ Field_Utils["[Utility]
field_analysis_utils.R"]
+ Field_Out1["📦 OUTPUT
{project}_field_analysis
_week{WW}.xlsx"]
+ Field_Out2["📦 OUTPUT
{project}_kpi_summary
_tables_week{WW}.rds"]
+
+ %% ===== STAGE 6: REPORT =====
+ Report["STAGE 6: Report Generation
10_CI_report_with_kpis_simple.Rmd"]
+ Report_Utils["[Utility]
report_utils.R"]
+ Report_Out1["📦 PRIMARY OUTPUT
SmartCane_Report
_week{WW}_{YYYY}.docx"]
+ Report_Out2["📦 ALTERNATIVE
SmartCane_Report
_week{WW}_{YYYY}.html"]
+
+ %% ===== CONFIG =====
+ Config["[Utility]
parameters_project.R"]
+
+ %% ===== CONNECTIONS =====
API --> Download
- FieldBoundaries --> Download
- DateParams --> Download
- ProjectConfig --> Download
+ Bounds --> Download
+ Download --> DL_Out
- Download --> DownloadParams
- DownloadParams --> RawTiles
- RawTiles --> DailyMosaics
+ DL_Out --> CI
+ Bounds --> CI
+ Config --> CI
+ CI --> CI_Utils
+ CI --> CI_Out
- DailyMosaics --> CIScript
- FieldBoundaries --> CIScript
- CIScript --> CIParams
- CIParams --> ProcessedRasters
- ProcessedRasters --> DailyExtract
- DailyExtract --> CombinedCI
+ CI_Out --> Growth
+ Harvest --> Growth
+ Config --> Growth
+ Growth --> Growth_Utils
+ Growth --> Growth_Out
- CombinedCI --> GrowthScript
- HarvestData --> GrowthScript
- GrowthScript --> GrowthParams
- GrowthParams --> InterpolatedModel
+ DL_Out --> Mosaic
+ Bounds --> Mosaic
+ Config --> Mosaic
+ Mosaic --> Mosaic_Utils
+ Mosaic --> Mosaic_Out
- ProcessedRasters --> MosaicScript
- FieldBoundaries --> MosaicScript
- MosaicScript --> CloudAssess
- CloudAssess --> MosaicParams
- MosaicParams --> WeeklyMosaic
+ Mosaic_Out --> Field
+ Growth_Out --> Field
+ Bounds --> Field
+ Harvest --> Field
+ Config --> Field
+ Field --> Field_Utils
+ Field --> Field_Out1
+ Field --> Field_Out2
- WeeklyMosaic --> KPIScript
+ Mosaic_Out --> Report
+ Field_Out2 --> Report
+ Field_Out1 --> Report
+ Config --> Report
+ Report --> Report_Utils
+ Report --> Report_Out1
+ Report --> Report_Out2
+
+ %% ===== STYLING =====
+ classDef input fill:#e1f5ff,stroke:#01579b,stroke-width:2px
+ classDef stage fill:#f3e5f5,stroke:#4a148c,stroke-width:2px
+ classDef output fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px
+ classDef util fill:#fff3e0,stroke:#e65100,stroke-width:2px
+
+ class API,Bounds,Harvest,Config input
+ class Download,CI,Growth,Mosaic,Field,Report stage
+ class DL_Out,CI_Out,Growth_Out,Mosaic_Out,Field_Out1,Field_Out2,Report_Out1,Report_Out2 output
+ class CI_Utils,Growth_Utils,Mosaic_Utils,Field_Utils,Report_Utils util
+```
+
+## Data Processing Pipeline
+
+### Stage 1: Satellite Data Acquisition (Python)
+- **Script**: `python_app/01_planet_download.py`
+- **Inputs**: API credentials, field boundaries (GeoJSON), date range
+- **Outputs**: Daily merged GeoTIFFs
+- **File Location**: `laravel_app/storage/app/{project}/merged_tif/`
+- **File Format**: `YYYY-MM-DD.tif` (4 bands: Red, Green, Blue, NIR, uint16)
+- **Processing**:
+ - Downloads from Sentinel Hub BYOC collection
+ - Applies cloud masking (UDM1 band)
+ - Merges tiles into daily mosaics
+ - Stores at 3m resolution
+
+### Stage 2: Canopy Index (CI) Extraction
+- **Script**: `r_app/02_ci_extraction.R`
+- **Utility Functions**: `ci_extraction_utils.R` (handles tile detection, RDS I/O)
+- **Inputs**: Daily GeoTIFFs, field boundaries (GeoJSON)
+- **Outputs**:
+ - Daily extractions (RDS): `Data/extracted_ci/daily_vals/extracted_{date}_{suffix}.rds`
+ - Cumulative dataset (RDS): `Data/extracted_ci/cumulative_vals/combined_CI_data.rds`
+- **File Format**:
+ - Daily: Per-field statistics (mean CI, count, notNA pixels)
+ - Cumulative: Wide format with fields as rows, dates as columns
+- **Processing**:
+ - Calculates CI = (NIR / Green) - 1
+ - Extracts stats per field using field geometry
+ - Handles missing pixels (clouds → NA values)
+ - Supports both full rasters and tile-based extraction
+- **Key Parameters**:
+ - CI formula: `(NIR / Green) - 1`
+ - Min valid pixels: 100 per field
+ - Cloud masking: UDM1 != 0
+
+### Stage 3: Growth Model Interpolation
+- **Script**: `r_app/03_interpolate_growth_model.R`
+- **Utility Functions**: `growth_model_utils.R` (interpolation, seasonal grouping)
+- **Inputs**:
+ - Combined CI data (RDS from Stage 2)
+ - Harvest data with season dates (Excel)
+- **Outputs**: Interpolated growth model (RDS)
+- **File Location**: `Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds`
+- **File Format**: Long-format data frame with columns:
+ - `Date`, `DOY` (Day of Year), `field`, `subField`, `value`, `season`
+ - `CI_per_day`, `cumulative_CI`, `FitData` (interpolated indicator)
+- **Processing**:
+ - Filters CI by season dates
+ - Linear interpolation across gaps: `approxfun()`
+ - Calculates daily changes and cumulative sums
+ - Groups by field and season year
+- **Key Calculations**:
+ - `CI_per_day` = today's CI - yesterday's CI
+ - `cumulative_CI` = rolling sum of daily CI
+
+### Stage 4: Weekly Mosaic Creation
+- **Script**: `r_app/04_mosaic_creation.R`
+- **Utility Functions**: `mosaic_creation_utils.R`, `mosaic_creation_tile_utils.R`
+- **Inputs**:
+ - Daily VRTs or GeoTIFFs from Stage 1
+ - Field boundaries
+- **Outputs**: Weekly composite mosaic (GeoTIFF)
+- **File Location**: `weekly_mosaic/week_{WW}_{YYYY}.tif`
+- **File Format**: 5-band GeoTIFF (R, G, B, NIR, CI), same spatial extent as daily images
+- **Processing**:
+ - Assesses cloud coverage per daily image
+ - Selects images with acceptable cloud coverage (<45%)
+ - Composites using MAX function (retains highest CI value)
+ - Outputs single weekly composite
+- **Key Parameters**:
+ - Cloud threshold (strict): <5% missing pixels
+ - Cloud threshold (relaxed): <45% missing pixels
+ - Composite function: MAX across selected images
+
+### Stage 5: Field Analysis & KPI Calculation
+- **Script**: `r_app/09_field_analysis_weekly.R` or `09b_field_analysis_weekly.R` (parallel version)
+- **Utility Functions**: `field_analysis_utils.R`, tile extraction functions
+- **Inputs**:
+ - Current week mosaic (GeoTIFF)
+ - Previous week mosaic (GeoTIFF)
+ - Interpolated growth model (RDS)
+ - Field boundaries (GeoJSON)
+ - Harvest data (Excel)
+- **Outputs**:
+ - Excel file: `reports/{project}_field_analysis_week{WW}.xlsx`
+ - RDS summary data: `reports/kpis/{project}_kpi_summary_tables_week{WW}.rds`
+- **File Format (Excel)**:
+ - Sheet 1: Field-level data with CI metrics, phase, status triggers
+ - Sheet 2: Summary statistics (monitored area, cloud coverage, phase distribution)
+- **Processing** (per field):
+ - Extracts CI from current and previous week mosaics
+ - Calculates field-level statistics: mean, std dev, CV (coefficient of variation)
+ - Assigns growth phase based on field age (Germination, Tillering, Grand Growth, Maturation)
+ - Detects status triggers (rapid growth, disease signals, weed pressure, harvest imminence)
+ - Assesses cloud coverage per field
+ - Parallel processing using `furrr` for 1000+ fields
+- **Key Calculations**:
+ - **Uniformity (CV)**: std_dev / mean, thresholds: <0.15 excellent, <0.25 good
+ - **Change**: (current_mean - previous_mean) / previous_mean
+ - **Phase age**: weeks since planting (from harvest.xlsx season_start)
+ - **Cloud coverage %**: (non-NA pixels / total pixels in field) * 100
+- **Status Triggers** (non-exclusive):
+ - Germination Started: 10% of field CI > 2.0
+ - Rapid Growth: CI increase > 0.5 units week-over-week
+ - Slow Growth: CI increase < 0.1 units week-over-week
+ - Non-Uniform Growth: CV > 0.25 (field heterogeneity)
+ - Weed Pressure: Rapid increase (>2.0 CI/week) with moderate area (<25%)
+ - Harvest Imminence: Age > 240 days + CI plateau detected
+
+### Stage 6: Report Generation
+- **Script**: `r_app/10_CI_report_with_kpis_simple.Rmd` (RMarkdown)
+- **Utility Functions**: `report_utils.R` (doc building, table formatting)
+- **Inputs**:
+ - Weekly mosaics (GeoTIFFs)
+ - KPI data and field analysis (RDS)
+ - Field boundaries, project config
+- **Outputs**:
+ - **Word document** (PRIMARY OUTPUT): `reports/SmartCane_Report_week{WW}_{YYYY}.docx`
+ - **HTML version** (optional): `reports/SmartCane_Report_week{WW}_{YYYY}.html`
+- **Report Contents**:
+ - Executive summary (KPI overview, monitored area, cloud coverage)
+ - Phase distribution tables and visualizations
+ - Status trigger summary (fields with active triggers)
+ - Field-by-field detail pages with CI metrics
+ - Interpretation guides for agronomic thresholds
+- **Report Generation Technology**:
+ - RMarkdown (`.Rmd`) rendered to Word via `officer` and `flextable` packages
+ - Tables with automatic width/height fitting
+ - Column interpretations embedded in reports
+ - Areas reported in both hectares and acres
+
+---
+
+## File Storage Structure
+
+All data persists to the file system. No database writes occur during analysis—only reads for metadata.
+
+```
+laravel_app/storage/app/{project}/
+├── Data/
+│ ├── pivot.geojson # Field boundaries (read-only)
+│ ├── pivot_2.geojson # ESA variant with extra fields
+│ ├── harvest.xlsx # Season dates & yield data (read-only)
+│ ├── vrt/ # Virtual raster files (daily VRTs)
+│ │ └── YYYY-MM-DD.vrt
+│ ├── extracted_ci/
+│ │ ├── daily_vals/
+│ │ │ └── extracted_YYYY-MM-DD_{suffix}.rds # Daily field stats
+│ │ └── cumulative_vals/
+│ │ ├── combined_CI_data.rds # Cumulative CI (wide)
+│ │ └── All_pivots_Cumulative_CI_quadrant_year_v2.rds # Interpolated
+│ └── daily_tiles_split/ # (Optional) Tile-based processing
+│ ├── master_grid_5x5.geojson
+│ └── YYYY-MM-DD/ # Date-specific folders
+│ └── YYYY-MM-DD_01.tif, ..., _25.tif
+│
+├── merged_tif/ # Raw daily satellite images (Stage 1 output)
+│ └── YYYY-MM-DD.tif # 4 bands: R, G, B, NIR
+│
+├── merged_final_tif/ # (Optional) Processed daily images
+│ └── YYYY-MM-DD.tif # 5 bands: R, G, B, NIR, CI
+│
+├── weekly_mosaic/ # Weekly composite mosaics (Stage 4 output)
+│ └── week_WW_YYYY.tif # 5 bands, ISO week numbering
+│
+└── reports/
+ ├── SmartCane_Report_week{WW}_{YYYY}.docx # PRIMARY OUTPUT (Stage 6)
+ ├── SmartCane_Report_week{WW}_{YYYY}.html # Alternative format
+ ├── {project}_field_analysis_week{WW}.xlsx # PRIMARY OUTPUT (Stage 5)
+ ├── {project}_harvest_predictions_week{WW}.xlsx # Harvest tracking
+ ├── {project}_cloud_coverage_week{WW}.rds # Per-field cloud stats
+ ├── {project}_kpi_summary_tables_week{WW}.rds # Summary data (consumed by reports)
+ └── kpis/
+ └── week_WW_YYYY/
+ └── *.csv # Individual KPI exports
+```
+
+### Data Types by File
+
+| File Extension | Purpose | Stage | Example Files |
+|---|---|---|---|
+| `.tif` | Geospatial raster imagery | 1, 4 | `YYYY-MM-DD.tif`, `week_41_2025.tif` |
+| `.vrt` | Virtual raster (pointer to TIFFs) | 2 | `YYYY-MM-DD.vrt` |
+| `.rds` | R serialized data (binary format) | 2, 3, 5, 6 | `combined_CI_data.rds`, `kpi_results_week41.rds` |
+| `.geojson` | Field boundaries (read-only) | Input | `pivot.geojson` |
+| `.xlsx` | Excel reports & harvest data | 5, 6 (output), Input (harvest) | `field_analysis_week41.xlsx` |
+| `.docx` | Word reports (final output) | 6 | `SmartCane_Report_week41_2025.docx` |
+| `.html` | HTML reports (alternative) | 6 | `SmartCane_Report_week41_2025.html` |
+| `.csv` | Summary tables (for external use) | 5, 6 | `field_details.csv`, `kpi_summary.csv` |
+
+---
+
+## Script Dependency Map
+
+```
+01_create_master_grid_and_split_tiffs.R (Optional)
+ └→ [Utility] parameters_project.R
+
+02_ci_extraction.R
+ ├→ [Utility] parameters_project.R
+ └→ [Utility] ci_extraction_utils.R
+ └ Functions: find_satellite_images(), process_satellite_images(),
+ process_ci_values(), process_ci_values_from_tiles()
+
+03_interpolate_growth_model.R
+ ├→ [Utility] parameters_project.R
+ └→ [Utility] growth_model_utils.R
+ └ Functions: load_combined_ci_data(), generate_interpolated_ci_data(),
+ calculate_growth_metrics(), save_growth_model()
+
+04_mosaic_creation.R
+ ├→ [Utility] parameters_project.R
+ └→ [Utility] mosaic_creation_utils.R
+ └ Functions: create_weekly_mosaic_from_tiles(), save_mosaic(),
+ assess_cloud_coverage()
+
+09_field_analysis_weekly.R (or 09b_field_analysis_weekly.R - parallel version)
+ ├→ [Utility] parameters_project.R
+ ├→ [Utility] field_analysis_utils.R
+ └→ Outputs: Excel files, RDS summary files
+ └ Functions: load_ci_data(), analyze_field_stats(),
+ assign_growth_phase(), detect_triggers(),
+ export_to_excel()
+
+10_CI_report_with_kpis_simple.Rmd (RMarkdown → rendered to .docx/.html)
+ ├→ [Utility] parameters_project.R
+ ├→ [Utility] report_utils.R
+ └→ [Packages] officer, flextable
+ └ Functions: body_add_flextable(), add_paragraph(),
+ officer::read_docx(), save_docx()
+```
+
+### Utility Files Description
+
+- **`parameters_project.R`**: Loads project configuration (paths, field boundaries, harvest data, project metadata)
+- **`ci_extraction_utils.R`**: CI calculation, field masking, RDS I/O for daily & cumulative CI data
+- **`growth_model_utils.R`**: Linear interpolation, seasonal grouping, daily metrics calculation
+- **`mosaic_creation_utils.R`**: Weekly mosaic compositing, cloud assessment, raster masking
+- **`field_analysis_utils.R`**: Per-field statistics, phase assignment, trigger detection, Excel export
+- **`report_utils.R`**: RMarkdown helpers, table formatting, Word document building via `officer` package
+
+---
+
+## Data Type Reference
+
+### RDS (R Data Serialization)
+
+RDS files store R data objects in binary format. They preserve data types, dimensions, and structure perfectly. Key RDS files in the pipeline:
+
+| File | Structure | Rows | Columns | Use |
+|---|---|---|---|---|
+| `combined_CI_data.rds` | Data frame (wide format) | # fields | # dates | All-time CI by field |
+| `All_pivots_Cumulative_CI_quadrant_year_v2.rds` | Data frame (long format) | ~1M+ rows | 11 columns | Interpolated daily CI, used for yield models |
+| `kpi_summary_tables_week{WW}.rds` | List of data frames | — | varies | Field KPIs, phase dist., triggers |
+| `cloud_coverage_week{WW}.rds` | Data frame | # fields | 4 columns | Per-field cloud %, category |
+
+### Excel (.xlsx)
+
+Primary output format for stakeholder consumption:
+
+| Sheet | Content | Rows | Columns | Key Data |
+|---|---|---|---|---|
+| Field Data | Field-by-field analysis | # fields | ~15 | CI mean/std, phase, status, cloud% |
+| Summary | Farm-wide statistics | 10-20 | 3 | Monitored area (ha/acres), cloud dist., phases |
+
+### Word (.docx)
+
+Executive report format via RMarkdown → `officer`:
+
+- Title page with metadata (project, week, date, total fields, acreage)
+- Executive summary with KPIs
+- Phase analysis section with distribution tables
+- Status trigger summary
+- Field-by-field detail pages
+- Interpretation guides
+
+---
+
+## Key Calculations & Thresholds
+
+### Canopy Index (CI)
+
+```
+CI = (NIR / Green) - 1
+
+Range: -1 to +∞
+Interpretation:
+ CI < 0 → Non-vegetated (water, bare soil)
+ 0 < CI < 1 → Sparse vegetation (early growth)
+ 1 < CI < 2 → Moderate vegetation
+ CI > 2 → Dense vegetation (mature crop)
+```
+
+### Growth Phase Assignment (Age-Based)
+
+Based on weeks since planting (`season_start` from harvest.xlsx):
+
+| Phase | Age Range | Characteristics |
+|---|---|---|
+| Germination | 0-6 weeks | Variable emergence, low CI |
+| Tillering | 6-18 weeks | Shoot development, increasing CI |
+| Grand Growth | 18-35 weeks | Peak growth, high CI accumulation |
+| Maturation | 35+ weeks | Sugar accumulation, plateau or decline |
+
+### Field Uniformity (Coefficient of Variation)
+
+```
+CV = std_dev / mean
+
+Interpretation:
+ CV < 0.15 → Excellent uniformity
+ CV < 0.25 → Good uniformity
+ CV < 0.35 → Moderate uniformity
+ CV ≥ 0.35 → Poor uniformity (management attention needed)
+```
+
+### Cloud Coverage Classification (Per-Field)
+
+```
+cloud_pct = (non_NA_pixels / total_pixels) * 100
+
+Categories:
+ ≥99.5% → Clear view (usable for analysis)
+ 0-99.5% → Partial coverage (biased estimates)
+ 0% → No image available (excluded from analysis)
+```
+
+### Status Triggers (Non-Exclusive)
+
+Fields can have multiple simultaneous triggers:
+
+| Trigger | Detection Method | Data Used |
+|---|---|---|
+| **Germination Started** | 10% of field CI > 2.0 | Current week CI extraction |
+| **Rapid Growth** | Week-over-week increase > 0.5 CI units | Mosaic-based extraction |
+| **Slow Growth** | Week-over-week increase < 0.1 CI units | Mosaic-based extraction |
+| **Non-Uniform** | CV > 0.25 | Spatial stats per field |
+| **Weed Pressure** | Rapid increase (>2.0 CI/week) + area <25% | Spatial clustering analysis |
+| **Harvest Imminence** | Age > 240 days + CI plateau | Temporal analysis, phase assignment |
+
+---
+
+## Processing Configuration & Parameters
+
+All parameters are configurable via command-line arguments or environment variables:
+
+### Download Stage (Python)
+- `DATE`: End date for download (YYYY-MM-DD), default: today
+- `DAYS`: Days lookback, default: 7
+- `resolution`: Output resolution in meters, default: 3
+- `max_threads`: Concurrent download threads, default: 15
+- Grid split: `(5, 5)` bounding boxes (hardcoded)
+
+### CI Extraction Stage (R)
+- `end_date`: End date (YYYY-MM-DD)
+- `offset`: Days lookback (default: 7)
+- `project_dir`: Project directory name (required)
+- `data_source`: Source folder (merged_tif_8b, merged_tif, or merged_final_tif)
+- Auto-detection: If `daily_tiles_split/` exists, uses tile-based processing
+
+### Mosaic Creation Stage (R)
+- `end_date`: End date
+- `offset`: Days lookback
+- `project_dir`: Project directory
+- `file_name`: Custom output filename (optional)
+- Cloud thresholds: 5% (strict), 45% (relaxed) - hardcoded
+
+### Field Analysis Stage (R)
+- `end_date`: End date
+- `project_dir`: Project directory
+- Parallel workers: Auto-detected via `future::plan()` or user-configurable
+- Thresholds: CV, change, weed detection - configurable in code
+
+---
+
+## Database Usage
+
+The system does NOT write to the database during analysis. Database tables (`project_reports`, `project_mosaics`, `project_mailings`) are maintained by the Laravel application for:
+
+- Report metadata tracking
+- Email delivery history
+- Report version control
+
+File system is the single source of truth for all analysis data.
+
FieldBoundaries --> KPIScript
HarvestData --> KPIScript
InterpolatedModel --> KPIScript