Refactor full pipeline script to include intelligent checking of existing outputs and dynamic execution of scripts. Added new Python scripts for RGB validation and evaluation template creation. Enhanced error handling and logging throughout the pipeline.

This commit is contained in:
Timon 2026-01-27 08:58:06 +01:00
parent 4143bdf4d7
commit 9a55d2fcf8
14 changed files with 1035 additions and 568 deletions

View file

@ -1,4 +1,4 @@
"""
r"""
Script: 02_harvest_imminent_weekly.py
Purpose: WEEKLY MONITORING - Run WEEKLY/DAILY to get real-time harvest status for all fields
@ -38,12 +38,12 @@ Use Cases:
- Feed into 09b script for weekly dashboard reports
Usage:
python 02_harvest_imminent_weekly.py [project_name]
python python_app/31_harvest_imminent_weekly.py angata
Examples:
python 02_harvest_imminent_weekly.py angata
python 02_harvest_imminent_weekly.py esa
python 02_harvest_imminent_weekly.py chemba
python python_app/31_harvest_imminent_weekly.py angata
python python_app/31_harvest_imminent_weekly.py esa
python python_app/31_harvest_imminent_weekly.py chemba
If no project specified, defaults to 'angata'
"""
@ -264,7 +264,7 @@ def main():
# [3] Load model (from python_app directory)
print("\n[3/5] Loading Model 307...")
model_dir = Path(".") # Current directory is python_app/, contains model.pt, config.json, scalers.pkl
model_dir = Path("python_app") # Model files located in python_app/ directory
model, config, scalers = load_model_and_config(model_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f" Device: {device}")

View file

@ -1,23 +1,25 @@
#!/usr/bin/env python
"""
Batch RGB Validation for Top 50 Largest Fields
Batch RGB Validation for Top 100 Largest Fields - V3
Generates 5x3 RGB temporal grids for the latest complete harvest season of the 50 largest fields.
Same as v1 but with dynamic image selection (checks for actual data, skips empty/black images).
Generates 5x3 RGB temporal grids for the latest complete harvest season of the 100 largest fields.
Uses actual season_end dates from harvest.xlsx for visual validation of field conditions at harvest.
Configuration:
- GeoJSON: pivot.geojson (defines field boundaries and sizes)
- Harvest data: harvest.xlsx (season_end dates for completed harvests)
- CI data: ci_data_for_python.csv
- Output: RGB directory with field_name_YYYYMMDD_harvest_rgb.png
Usage:
python batch_rgb_validation_top_fields.py
python batch_rgb_validation_top_fields_v3.py --field 1
python batch_rgb_validation_top_fields_v3.py
Output:
- Saves 5x3 RGB grids to: laravel_app/storage/app/angata/RGB/
- Filenames: field_<NAME>_<YYYYMMDD>_harvest_rgb.png
- Each grid shows 15 images at 7-day intervals around the season_end date
- Each grid shows 15 images around the harvest date (dynamic date selection, skips empty images)
"""
import json
@ -26,6 +28,7 @@ import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
import sys
import argparse
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
@ -189,16 +192,26 @@ def load_harvest_dates_from_xlsx(harvest_xlsx_path, top_50_fields_df):
def main():
parser = argparse.ArgumentParser(description='RGB validation of harvest dates using satellite imagery (v3 - dynamic)')
parser.add_argument('--field', type=str, default=None, help='Specific field ID to validate (e.g., "1" or "10022")')
parser.add_argument('--project', type=str, default='angata', help='Project name (default: angata)')
args = parser.parse_args()
print("="*90)
print("BATCH RGB VALIDATION - TOP 50 LARGEST FIELDS")
print("Visual inspection of latest harvest dates from harvest.xlsx using RGB imagery")
if args.field:
print(f"RGB VALIDATION V3 - SINGLE FIELD: {args.field}")
else:
print("RGB VALIDATION V3 - TOP 50 LARGEST FIELDS")
print("Visual inspection of harvest dates from harvest.xlsx using RGB imagery (dynamic selection)")
print("="*90)
# Configuration
geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
harvest_xlsx = Path("laravel_app/storage/app/angata/Data/harvest.xlsx")
output_dir = Path("laravel_app/storage/app/angata/RGB")
tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
project = args.project
geojson_path = Path(f"laravel_app/storage/app/{project}/Data/pivot.geojson")
harvest_xlsx = Path(f"laravel_app/storage/app/{project}/Data/harvest.xlsx")
output_dir = Path(f"laravel_app/storage/app/{project}/RGB")
tiff_dir = Path(f"laravel_app/storage/app/{project}/merged_final_tif/5x5")
# Verify paths
if not geojson_path.exists():
@ -213,18 +226,83 @@ def main():
output_dir.mkdir(parents=True, exist_ok=True)
# Step 1: Load GeoJSON and get top 50 largest fields
print("\n[1/4] Loading GeoJSON and identifying top 50 largest fields...")
# Handle single field mode
if args.field:
print(f"\n[1/3] Loading harvest data for field {args.field}...")
harvest_df = pd.read_excel(harvest_xlsx)
harvest_df['season_end'] = pd.to_datetime(harvest_df['season_end'], errors='coerce')
harvest_df['field'] = harvest_df['field'].astype(str).str.strip()
field_records = harvest_df[harvest_df['field'] == args.field]
field_records = field_records[field_records['season_end'].notna()]
if len(field_records) == 0:
print(f"✗ No harvest data found for field {args.field}")
return
# Get latest harvest for this field
latest_idx = field_records['season_end'].idxmax()
latest_row = field_records.loc[latest_idx]
harvest_date = latest_row['season_end']
print(f" ✓ Found harvest: {harvest_date.strftime('%Y-%m-%d')}")
# Load field name from GeoJSON
print(f"\n[2/3] Loading field name from GeoJSON...")
with open(geojson_path) as f:
geojson_data = json.load(f)
field_name = f"field_{args.field}"
for feature in geojson_data.get('features', []):
props = feature.get('properties', {})
if str(props.get('field', '')) == args.field:
field_name = props.get('name', field_name)
break
print(f" ✓ Field name: {field_name}")
# Generate RGB grid
print(f"\n[3/3] Generating RGB validation grid (v3 dynamic)...")
results = generate_rgb_grids(
field_data=None,
field_id=args.field,
registered_harvest_dates=[],
predicted_harvest_dates=[
{
'harvest_date': harvest_date,
'model_name': 'harvest_xlsx'
}
],
output_dir=str(output_dir),
tiff_dir=str(tiff_dir),
geojson_path=str(geojson_path)
)
print("\n" + "="*90)
if results['predicted']:
print(f"✓ RGB grid generated successfully!")
print(f" Field: {field_name} (ID: {args.field})")
print(f" Harvest date: {harvest_date.strftime('%Y-%m-%d')}")
print(f" Output: {output_dir}")
else:
print(f"⚠ No RGB grid generated (no imagery available)")
print("="*90)
return
# Batch mode for top 100 fields
print(f"\n[1/4] Loading GeoJSON and identifying top 100 largest fields...")
fields_df = load_geojson_and_calculate_areas(geojson_path)
if fields_df is None:
return
top_50_fields = fields_df.head(50)
print(f" ✓ Selected {len(top_50_fields)} largest fields for processing")
top_100_fields = fields_df.head(100)
print(f" ✓ Selected {len(top_100_fields)} largest fields for processing")
# Step 2: Load harvest dates from Excel
print("\n[2/4] Loading harvest dates from Excel (latest complete seasons)...")
harvest_dates = load_harvest_dates_from_xlsx(harvest_xlsx, top_50_fields)
harvest_dates = load_harvest_dates_from_xlsx(harvest_xlsx, top_100_fields)
if len(harvest_dates) == 0:
print("✗ No harvest dates found in Excel file")
@ -237,7 +315,7 @@ def main():
print(f" ... and {len(harvest_dates) - 5} more")
# Step 3: Generate RGB grids for each field
print("\n[3/4] Generating RGB validation grids...")
print("\n[3/4] Generating RGB validation grids (v3 dynamic)...")
rgb_count = 0
for idx, (field_id, harvest_info) in enumerate(harvest_dates.items(), 1):

View file

@ -0,0 +1,193 @@
"""
Create an Excel evaluation template for RGB harvest date predictions.
Parses field names and dates directly from RGB image filenames.
"""
import os
import glob
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
import re
from datetime import datetime
# Configuration
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
RGB_DIR = os.path.join(BASE_DIR, "laravel_app", "storage", "app", "angata", "RGB")
OUTPUT_PATH = os.path.join(BASE_DIR, "laravel_app", "storage", "app", "angata", "RGB_Evaluation_Template.xlsx")
# Evaluators
EVALUATORS = ["Joey", "Daniel", "Nik", "Dimitra", "Timon"]
def parse_rgb_filenames():
"""
Parse field names and harvest dates from RGB image filenames.
Expected format: field_{field_id or name}_{YYYYMMDD}_harvest_xlsx_harvest_rgb.png
"""
fields_data = []
# Find all RGB images
rgb_files = glob.glob(os.path.join(RGB_DIR, "field_*.png"))
for filepath in sorted(rgb_files):
filename = os.path.basename(filepath)
# Pattern: field_{field_id}_{YYYYMMDD}_harvest_xlsx_harvest_rgb.png
match = re.match(r"field_(.+?)_(\d{8})_harvest_xlsx_harvest_rgb\.png", filename)
if match:
field_id = match.group(1) # e.g., "1000" or "91&92"
date_str = match.group(2) # e.g., "20250814"
# Format date as YYYY-MM-DD
try:
harvest_date = datetime.strptime(date_str, "%Y%m%d").strftime("%Y-%m-%d")
except ValueError:
harvest_date = date_str
fields_data.append({
"field_id": field_id,
"harvest_date": harvest_date,
"filename": filename
})
# Sort by field_id (treating numeric parts as integers where possible)
fields_data = sorted(fields_data, key=lambda x: (x["field_id"].replace("&92", ""), ))
return fields_data
def create_evaluation_template():
"""Create the Excel evaluation template."""
print("Loading field data from RGB images...")
fields_data = parse_rgb_filenames()
if not fields_data:
print("ERROR: No RGB images found in", RGB_DIR)
return
print(f"Found {len(fields_data)} RGB images")
# Create workbook
wb = Workbook()
# === SHEET 1: Evaluation Form ===
ws_eval = wb.active
ws_eval.title = "Evaluation"
# Define styles
header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
header_font = Font(bold=True, color="FFFFFF", size=11)
border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
center_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
left_align = Alignment(horizontal="left", vertical="center", wrap_text=True)
# Column headers
headers = ["Field ID", "Predicted Harvest Date"] + EVALUATORS
for col_idx, header in enumerate(headers, start=1):
cell = ws_eval.cell(row=1, column=col_idx, value=header)
cell.fill = header_fill
cell.font = header_font
cell.alignment = center_align
cell.border = border
# Set column widths
ws_eval.column_dimensions['A'].width = 15
ws_eval.column_dimensions['B'].width = 20
for col_idx in range(3, 3 + len(EVALUATORS)):
ws_eval.column_dimensions[get_column_letter(col_idx)].width = 12
# Add data rows
for row_idx, field in enumerate(fields_data, start=2):
ws_eval.cell(row=row_idx, column=1, value=field["field_id"])
ws_eval.cell(row=row_idx, column=2, value=field["harvest_date"])
# Add empty cells for evaluator responses
for col_idx in range(3, 3 + len(EVALUATORS)):
cell = ws_eval.cell(row=row_idx, column=col_idx)
cell.alignment = center_align
cell.border = border
# Light alternating row color
if row_idx % 2 == 0:
for col_idx in range(1, 3 + len(EVALUATORS)):
ws_eval.cell(row=row_idx, column=col_idx).fill = PatternFill(
start_color="D9E8F5", end_color="D9E8F5", fill_type="solid"
)
# Apply borders to all data cells
for col_idx in range(1, 3 + len(EVALUATORS)):
ws_eval.cell(row=row_idx, column=col_idx).border = border
if col_idx == 1 or col_idx == 2:
ws_eval.cell(row=row_idx, column=col_idx).alignment = left_align
# Freeze panes
ws_eval.freeze_panes = "C2"
# === SHEET 2: Instructions ===
ws_instr = wb.create_sheet("Instructions")
instr_content = [
["RGB Evaluation Instructions", ""],
["", ""],
["Overview:", ""],
["The generated RGB images visualize the predicted harvest dates for each field.", ""],
["The images are 3x3 grids showing satellite imagery from different dates", ""],
["centered on the predicted harvest date (the center/red-box image).", ""],
["", ""],
["What to Evaluate:", ""],
["For each field, determine if the predicted harvest date is CORRECT:", ""],
["", ""],
["Instructions for Reviewing:", ""],
["1. Look at the CENTER image (red box) - this is the predicted harvest date", ""],
["2. Compare to surrounding dates (before and after)", ""],
["3. Look for change in field color/status:", ""],
[" • BEFORE: Field appears GREEN (growing/healthy crop)", ""],
[" • AT PREDICTED DATE: Field shows BROWN/YELLOW (soil visible, ripe for harvest)", ""],
[" • AFTER: Field continues BROWN (post-harvest or dormant)", ""],
["", ""],
["How to Enter Your Assessment:", ""],
["Enter one of the following in your evaluator column for each field:", ""],
[" • YES = Predicted date is CORRECT (brown/harvest-ready at center date)", ""],
[" • NO = Predicted date is INCORRECT (not ready or already harvested)", ""],
[" • ? or MAYBE = Uncertain (cloudy images, unclear field status)", ""],
["", ""],
["Workflow Options:", ""],
["Option A (Divide Work): Assign 2-3 fields per evaluator (rows divided by column)", ""],
["Option B (Full Review): Each evaluator reviews all fields (everyone fills all rows)", ""],
["Option C (Spot Check): Each evaluator samples 5-10 random fields", ""],
["", ""],
["Image Location:", ""],
["All RGB images are in: /laravel_app/storage/app/angata/RGB/", ""],
["Format: field_{FIELD_ID}_{YYYY-MM-DD}_harvest_xlsx_harvest_rgb.png", ""],
["", ""],
["Notes:", ""],
["• Cloud cover may obscure ground truth - use best judgment", ""],
["• Fields with multiple bands or irregular shapes: focus on dominant area", ""],
["• Use context from previous/next dates to validate your assessment", ""],
]
# Add instructions text
for row_idx, row_data in enumerate(instr_content, start=1):
for col_idx, value in enumerate(row_data, start=1):
cell = ws_instr.cell(row=row_idx, column=col_idx, value=value)
if row_idx == 1:
cell.font = Font(bold=True, size=14)
elif any(keyword in str(value) for keyword in ["Overview:", "Instructions", "Workflow", "Image Location", "Notes"]):
cell.font = Font(bold=True, size=11)
cell.alignment = Alignment(horizontal="left", vertical="top", wrap_text=True)
ws_instr.column_dimensions['A'].width = 50
ws_instr.column_dimensions['B'].width = 80
# Save workbook
wb.save(OUTPUT_PATH)
print(f"✓ Evaluation template created: {OUTPUT_PATH}")
print(f"{len(fields_data)} fields added to evaluation form")
print(f"✓ Evaluators: {', '.join(EVALUATORS)}")
if __name__ == "__main__":
create_evaluation_template()

View file

@ -1,107 +0,0 @@
#!/usr/bin/env python
"""
Debug script to find all tiles for a date and check which overlap with field boundary
"""
import json
import rasterio
from rasterio.mask import mask
from pathlib import Path
import numpy as np
import shapely.geometry as shgeom
import pandas as pd
# Load field 79 boundary
geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
field_id = "79"
print(f"Loading field {field_id} from GeoJSON...")
with open(geojson_path) as f:
geojson_data = json.load(f)
field_boundary = None
for feature in geojson_data.get('features', []):
props = feature.get('properties', {})
if str(props.get('field', '')) == str(field_id):
geometry = feature.get('geometry')
if geometry:
geom_type = geometry.get('type', '')
coordinates = geometry.get('coordinates', [])
if geom_type == 'MultiPolygon':
if coordinates and len(coordinates) > 0:
coords = coordinates[0][0]
field_boundary = shgeom.Polygon(coords)
elif geom_type == 'Polygon':
if coordinates and len(coordinates) > 0:
coords = coordinates[0]
field_boundary = shgeom.Polygon(coords)
break
if field_boundary is None:
print(f"Field {field_id} not found")
exit(1)
print(f"Field boundary bounds: {field_boundary.bounds}")
print(f"Field boundary area: {field_boundary.area}")
# Find a specific date directory
tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
target_date = pd.Timestamp("2026-01-15") # Use a recent date that exists
# Find tiles for that date
date_dirs = []
for date_dir in tiff_dir.iterdir():
if date_dir.is_dir():
try:
dir_name = date_dir.name
date_str = dir_name.split('_')[0]
tile_date = pd.Timestamp(date_str)
if tile_date == target_date:
date_dirs.append(date_dir)
except:
pass
if not date_dirs:
print(f"No tiles found for {target_date}")
exit(1)
print(f"\nFound {len(date_dirs)} date directory(ies) for {target_date}")
for date_dir in date_dirs:
print(f"\n=== Checking date directory: {date_dir.name} ===")
tiles = list(date_dir.glob("*.tif"))
print(f"Found {len(tiles)} tiles in this directory")
for tile_path in sorted(tiles):
try:
with rasterio.open(tile_path) as src:
tile_bounds = src.bounds
tile_geom = shgeom.box(*tile_bounds)
intersects = field_boundary.intersects(tile_geom)
intersection = field_boundary.intersection(tile_geom) if intersects else None
intersection_area = intersection.area if intersection else 0
print(f"\n{tile_path.name}")
print(f" Tile bounds: {tile_bounds}")
print(f" Intersects field: {intersects}")
if intersects:
print(f" Intersection area: {intersection_area:.8f}")
# Try to mask this tile
geom = shgeom.mapping(field_boundary)
try:
masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3])
print(f" ✓ Successfully masked! Shape: {masked_data.shape}")
# Check the data in each band
for i, band_idx in enumerate([1, 2, 3]):
band_data = masked_data[i]
non_zero = (band_data != 0).sum()
print(f" Band {band_idx}: {non_zero} non-zero pixels out of {band_data.size}")
except ValueError as e:
print(f" ✗ Masking failed: {e}")
except Exception as e:
print(f" Error reading tile: {e}")

View file

@ -1,102 +0,0 @@
#!/usr/bin/env python
"""
Debug script to diagnose why field boundary masking produces no data
"""
import json
import rasterio
from rasterio.mask import mask
from pathlib import Path
import numpy as np
import shapely.geometry as shgeom
# Load a sample field boundary
geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
field_id = "79" # A field that had issues
print(f"Loading field {field_id} from GeoJSON...")
with open(geojson_path) as f:
geojson_data = json.load(f)
field_boundary = None
for feature in geojson_data.get('features', []):
props = feature.get('properties', {})
if str(props.get('field', '')) == str(field_id):
geometry = feature.get('geometry')
if geometry:
geom_type = geometry.get('type', '')
coordinates = geometry.get('coordinates', [])
if geom_type == 'MultiPolygon':
if coordinates and len(coordinates) > 0:
coords = coordinates[0][0]
field_boundary = shgeom.Polygon(coords)
elif geom_type == 'Polygon':
if coordinates and len(coordinates) > 0:
coords = coordinates[0]
field_boundary = shgeom.Polygon(coords)
break
if field_boundary is None:
print(f"Field {field_id} not found")
exit(1)
print(f"Field boundary bounds: {field_boundary.bounds}")
print(f"Field boundary area: {field_boundary.area}")
# Load a sample TIFF tile
tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
tile_file = None
for date_dir in sorted(tiff_dir.iterdir()):
if date_dir.is_dir():
for tif in date_dir.glob("*.tif"):
if tif.stat().st_size > 12e6:
tile_file = tif
break
if tile_file:
break
if not tile_file:
print("No suitable TIFF found")
exit(1)
print(f"\nTesting with TIFF: {tile_file.name}")
with rasterio.open(tile_file) as src:
print(f"TIFF Bounds: {src.bounds}")
print(f"TIFF CRS: {src.crs}")
# Check if field boundary is within tile bounds
tile_box = shgeom.box(*src.bounds)
intersects = field_boundary.intersects(tile_box)
print(f"Field boundary intersects tile: {intersects}")
if intersects:
intersection = field_boundary.intersection(tile_box)
print(f"Intersection area: {intersection.area}")
print(f"Intersection bounds: {intersection.bounds}")
# Try to mask and see what we get
print("\nAttempting to mask...")
geom = shgeom.mapping(field_boundary)
try:
masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3])
print(f"Masked data shape: {masked_data.shape}")
print(f"Masked data dtype: {masked_data.dtype}")
# Check the data
for i, band_idx in enumerate([1, 2, 3]):
band_data = masked_data[i]
print(f"\nBand {band_idx}:")
print(f" min: {np.nanmin(band_data):.6f}")
print(f" max: {np.nanmax(band_data):.6f}")
print(f" mean: {np.nanmean(band_data):.6f}")
print(f" % valid (non-zero): {(band_data != 0).sum() / band_data.size * 100:.2f}%")
print(f" % NaN: {np.isnan(band_data).sum() / band_data.size * 100:.2f}%")
# Show sample values
valid_pixels = band_data[band_data != 0]
if len(valid_pixels) > 0:
print(f" Sample valid values: {valid_pixels[:10]}")
except ValueError as e:
print(f"Error during masking: {e}")

View file

@ -1,47 +0,0 @@
#!/usr/bin/env python
"""
Debug script to inspect TIFF file structure and data
"""
import rasterio
from pathlib import Path
import numpy as np
# Pick a tile file to inspect
tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
# Find first available tile
tile_file = None
for date_dir in sorted(tiff_dir.iterdir()):
if date_dir.is_dir():
for tif in date_dir.glob("*.tif"):
if tif.stat().st_size > 12e6: # Skip empty files
tile_file = tif
break
if tile_file:
break
if not tile_file:
print("No suitable TIFF files found")
exit(1)
print(f"Inspecting: {tile_file.name}")
print("=" * 80)
with rasterio.open(tile_file) as src:
print(f"Band count: {src.count}")
print(f"Data type: {src.dtypes[0]}")
print(f"Shape: {src.height} x {src.width}")
print(f"CRS: {src.crs}")
print(f"Bounds: {src.bounds}")
print()
# Read each band
for band_idx in range(1, min(6, src.count + 1)):
data = src.read(band_idx)
print(f"Band {band_idx}:")
print(f" dtype: {data.dtype}")
print(f" range: {data.min():.6f} - {data.max():.6f}")
print(f" mean: {data.mean():.6f}")
print(f" % valid (non-zero): {(data != 0).sum() / data.size * 100:.1f}%")
print()

View file

@ -4,7 +4,7 @@ Purpose: Download Planet satellite data for missing dates only (skip existing fi
Can be called from batch scripts or other Python scripts.
Usage:
python download_planet_missing_dates.py --start 2022-01-01 --end 2025-12-15 --project angata
python download_planet_missing_dates.py --start 2026-01-17 --end 2026-12-20 --project angata
python download_planet_missing_dates.py --start 2023-06-01 --end 2023-06-30 --project angata --dry-run
Environment variables (alternative to CLI args):

View file

@ -1,29 +0,0 @@
# Merge all CI RDS files into a single CSV
library(tidyverse)
# Paths
ci_data_dir <- "r_app/experiments/ci_graph_exploration/CI_data"
output_csv <- "python_app/lstm_ci_data_combined.csv"
# Find all RDS files
rds_files <- list.files(ci_data_dir, pattern = "\\.rds$", full.names = TRUE)
print(paste("Found", length(rds_files), "RDS files"))
# Load and combine all files
combined_data <- tibble()
for (file in rds_files) {
filename <- basename(file)
client_name <- sub("\\.rds$", "", filename) # Extract client name from filename
print(paste("Loading:", filename, "- Client:", client_name))
data <- readRDS(file)
data$client <- client_name
combined_data <- bind_rows(combined_data, data)
}
print(paste("Total rows:", nrow(combined_data)))
print(paste("Columns:", paste(names(combined_data), collapse = ", ")))
# Write to CSV
write.csv(combined_data, output_csv, row.names = FALSE)
print(paste("✓ Saved to:", output_csv))

View file

@ -88,95 +88,116 @@ def load_field_boundaries(geojson_path, field_id):
return None, None
def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60):
def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=None, debug=False):
"""
Find all tile files for target_date (or closest date) that overlap with field_boundary.
Find tile files with actual data (not cloud-masked) for target_date or nearest date.
Tile files are organized in subdirectories by date: 5x5/YYYY-MM-DD_HH/*.tif
Searches by increasing distance from target date until finding tiles with data.
Avoids reusing dates in exclude_dates to ensure temporal diversity in grids.
Args:
target_date (pd.Timestamp): Target date to find tiles near
tiff_dir (Path): Directory containing 5x5 date subdirectories
field_boundary (shapely.Polygon): Field boundary for overlap detection
days_window (int): Max days to search before/after target
exclude_dates (list): List of dates to skip (avoid repetition)
debug (bool): Enable detailed debugging output
Returns:
tuple: (list of tile paths, actual_date, days_diff)
list: tile paths that overlap field
pd.Timestamp: actual date of tiles found
int: days difference from target to actual date found
"""
target_date = pd.Timestamp(target_date)
tiff_dir = Path(tiff_dir)
exclude_dates = exclude_dates or []
exclude_dates = [pd.Timestamp(d) for d in exclude_dates]
if not tiff_dir.exists():
if debug:
print(f" [DEBUG] TIFF dir does not exist: {tiff_dir}")
return [], None, None
# Find all date subdirectories
available_dates = {} # {date: ([tile file paths], actual_dir_name)}
min_size_mb = 12.0 # Empty files are ~11.56 MB
# Build map of all available dates
available_dates = {}
date_parse_errors = 0
for date_dir in tiff_dir.iterdir():
if not date_dir.is_dir():
continue
try:
# Parse date from directory name (YYYY-MM-DD or YYYY-MM-DD_HH)
dir_name = date_dir.name
# Extract just the date part before underscore if it exists
date_str = dir_name.split('_')[0]
tile_date = pd.Timestamp(date_str)
days_diff = (tile_date - target_date).days
if abs(days_diff) > days_window:
continue
# Find all .tif files in this date directory
tile_files = []
for tile_file in date_dir.glob('*.tif'):
# Skip obviously empty files
file_size_mb = tile_file.stat().st_size / (1024 * 1024)
if file_size_mb >= min_size_mb:
# Include ALL tiles, regardless of size
# Some tiles may be small but still contain valid data for specific fields
tile_files.append(tile_file)
if tile_files:
available_dates[tile_date] = (tile_files, dir_name)
except:
pass
except Exception as e:
date_parse_errors += 1
if debug:
print(f" [DEBUG] Failed to parse date from {date_dir.name}: {e}")
if debug:
print(f" [DEBUG] Found {len(available_dates)} dates with tile files, {date_parse_errors} parse errors")
print(f" [DEBUG] Date range: {min(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'} to {max(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'}")
if not available_dates:
return [], None, None
# Find closest date
closest_date = min(available_dates.keys(), key=lambda d: abs((d - target_date).days))
days_diff = (closest_date - target_date).days
tiles, _ = available_dates[closest_date]
# Search dates by increasing distance from target, looking for tiles with actual data
sorted_dates = sorted(available_dates.keys(), key=lambda d: abs((d - target_date).days))
# Filter tiles to only those that overlap field boundary
if rasterio is None or field_boundary is None:
# If rasterio not available, use all tiles (conservative approach)
return tiles, closest_date, days_diff
for search_date in sorted_dates:
# Skip if this date was recently used (avoid temporal repetition)
if search_date in exclude_dates:
continue
tiles, dir_name = available_dates[search_date]
days_diff = (search_date - target_date).days
# Try to find overlapping tiles at this date
overlapping_tiles = []
tile_check_errors = 0
for tile_path in tiles:
try:
with rasterio.open(tile_path) as src:
# Get tile bounds
tile_bounds = src.bounds # (left, bottom, right, top)
tile_bounds = src.bounds
tile_geom = shgeom.box(*tile_bounds)
# Check if tile overlaps field
# Debug first tile
if debug and len(overlapping_tiles) == 0 and tile_check_errors == 0:
print(f" [DEBUG] First tile check for {tile_path.name}:")
print(f" Tile bounds: {tile_bounds}")
print(f" Tile CRS: {src.crs}")
print(f" Field bounds: {field_boundary.bounds}")
print(f" Field geom type: {field_boundary.geom_type}")
print(f" Intersects: {tile_geom.intersects(field_boundary)}")
if tile_geom.intersects(field_boundary):
overlapping_tiles.append(tile_path)
except:
pass
except Exception as e:
tile_check_errors += 1
if debug:
print(f" [DEBUG] Error checking tile {tile_path.name}: {e}")
if not overlapping_tiles:
# No overlapping tiles found, return all tiles for the closest date
return tiles, closest_date, days_diff
if debug:
print(f" [DEBUG] Date {search_date.strftime('%Y-%m-%d')}: {len(tiles)} tiles, {len(overlapping_tiles)} overlap field, {tile_check_errors} errors")
return overlapping_tiles, closest_date, days_diff
if overlapping_tiles:
# Found overlapping tiles, return them
print(f" [FIND_TILES] Target: {target_date.strftime('%Y-%m-%d')}, Using: {search_date.strftime('%Y-%m-%d')} ({days_diff:+d}d), Tiles: {[Path(t).name for t in overlapping_tiles]}")
return overlapping_tiles, search_date, days_diff
# No overlapping tiles found at all
if debug:
print(f" [DEBUG] No overlapping tiles found for {target_date.strftime('%Y-%m-%d')} within {len(sorted_dates)} searched dates")
return [], None, None
def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
@ -211,13 +232,15 @@ def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
# For merged_final_tif: bands 1,2,3 are R,G,B
bands_to_read = (1, 2, 3)
# Mask and read bands
# Mask and read bands - extract ONLY the specific field polygon
geom = shgeom.mapping(field_boundary)
try:
masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read))
# Stack RGB
rgb = np.stack([masked_data[i] for i in range(3)], axis=-1)
except (ValueError, RuntimeError) as e:
# Mask failed - field doesn't overlap this tile or geometry issue
print(f" MASK ERROR on {Path(tiff_path).name}: {str(e)[:50]}")
return None
# Convert to float32 if not already
rgb = rgb.astype(np.float32)
@ -247,8 +270,6 @@ def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
rgb = np.nan_to_num(rgb, nan=0.0)
return rgb
except ValueError:
return None
except Exception as e:
return None
@ -284,9 +305,16 @@ def load_and_composite_tiles_rgb(tile_paths, field_boundary):
if len(rgb_arrays) == 1:
composited = rgb_arrays[0]
else:
# If multiple tiles, use max composite
# If multiple tiles, need to handle different shapes
# Find common shape or use max/min approach that handles variable sizes
try:
# Try to stack if same shape
stacked = np.stack(rgb_arrays, axis=0)
composited = np.max(stacked, axis=0)
except ValueError:
# Different shapes - use the largest (most complete) tile
# This happens when tiles are masked to different field areas
composited = max(rgb_arrays, key=lambda x: x.size)
composited = composited.astype(np.float32)
@ -307,6 +335,26 @@ def load_and_composite_tiles_rgb(tile_paths, field_boundary):
return None
def has_valid_rgb_data(rgb_data, threshold=0.05):
"""
Check if RGB image has actual data (not black/empty).
Returns True if max value > threshold (not all zeros/black).
"""
if rgb_data is None:
return False
try:
# Check if there's any variation in the RGB data
data_max = np.nanmax(rgb_data)
data_min = np.nanmin(rgb_data)
# Image is valid if max > threshold AND there's variation
has_data = data_max > threshold and (data_max - data_min) > 0.01
return has_data
except:
return False
def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field_boundary,
title, output_dir, harvest_type='registered', model_name=None, harvest_index=None):
"""
@ -334,91 +382,263 @@ def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field
"""
harvest_date = pd.Timestamp(harvest_date)
# Target dates: 15 images at 7-day intervals (8 pre, 1 near, 6 post)
target_dates = [
harvest_date - timedelta(days=56), # T-56d
harvest_date - timedelta(days=49), # T-49d
harvest_date - timedelta(days=42), # T-42d
harvest_date - timedelta(days=35), # T-35d
harvest_date - timedelta(days=28), # T-28d
harvest_date - timedelta(days=21), # T-21d
harvest_date - timedelta(days=14), # T-14d
harvest_date - timedelta(days=7), # T-7d
harvest_date, # T~0d (near harvest)
harvest_date + timedelta(days=7), # T+7d
harvest_date + timedelta(days=14), # T+14d
harvest_date + timedelta(days=21), # T+21d
harvest_date + timedelta(days=28), # T+28d
harvest_date + timedelta(days=35), # T+35d
harvest_date + timedelta(days=42), # T+42d
harvest_date + timedelta(days=56), # T+56d (Note: non-standard to fill 5th col in row 3)
]
# Pre-allocate 15 image slots
rgb_images = [None] * 15
days_offsets = [None] * 15
actual_dates = [None] * 15
used_dates = set() # Use set for efficient lookups
# Find TIFFs for each date
rgb_images = []
days_offsets = []
actual_dates = [] # Store actual dates of TIFFs found
# STEP 0: Debug - List all available dates
print(f" [STEP 0] Checking available TIFF dates in {tiff_dir}...")
available_dates = []
if tiff_dir.exists():
for date_folder in sorted(tiff_dir.iterdir()):
if date_folder.is_dir():
try:
date_obj = datetime.strptime(date_folder.name, '%Y-%m-%d').date()
available_dates.append(date_obj)
except:
pass
print(f" Found {len(available_dates)} dates with data: {available_dates[:5]}... (showing first 5)")
for target in target_dates:
tile_paths, actual_date, days_diff = find_overlapping_tiles(target, tiff_dir, field_boundary, days_window=60)
# STEP 1: Find anchor image (closest to predicted harvest date) FIRST
# Search within ±14 days of predicted harvest date first, then expand if needed
print(f" [STEP 1] Finding anchor (closest to harvest {harvest_date.strftime('%Y-%m-%d')}, searching ±14 days)...")
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
harvest_date, tiff_dir, field_boundary, days_window=14, exclude_dates=[], debug=False
)
if not tile_paths or actual_date is None:
rgb_images.append(None)
days_offsets.append(None)
actual_dates.append(None)
print(f" ⚠ No tiles found within 60 days of {target.strftime('%Y-%m-%d')} with sufficient data")
continue
anchor_rgb = None
anchor_idx = 8 # Position 8 is the center (T~0d / harvest date position)
failed_anchor_dates = [] # Track dates that failed validation
rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
rgb_images.append(rgb)
days_offsets.append(days_diff)
actual_dates.append(actual_date)
if rgb is not None:
print(f" ✓ Loaded {len(tile_paths)} tile(s) for {actual_date.strftime('%Y-%m-%d')} ({days_diff:+d}d from target)")
if anchor_tile_paths and anchor_date:
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
rgb_images[anchor_idx] = anchor_rgb
days_offsets[anchor_idx] = 0 # Anchor is reference point
actual_dates[anchor_idx] = anchor_date
used_dates.add(anchor_date)
print(f" ✓ ANCHOR FOUND (±14d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
else:
print(f" ⚠ Loaded {len(tile_paths)} tile(s) but RGB data is None")
failed_anchor_dates.append(anchor_date)
print(f" ⚠ Found date {anchor_date.strftime('%Y-%m-%d')} within ±14d, but image has no valid data")
print(f" [RETRY] Expanding anchor search to ±60 days (excluding failed dates)...")
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=set(failed_anchor_dates), debug=False
)
if anchor_tile_paths and anchor_date:
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
rgb_images[anchor_idx] = anchor_rgb
days_offsets[anchor_idx] = 0 # Anchor is reference point
actual_dates[anchor_idx] = anchor_date
used_dates.add(anchor_date)
print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
else:
failed_anchor_dates.append(anchor_date)
print(f" ✗ No valid anchor found even within ±60 days")
else:
print(f" ✗ No tiles found for any date within ±60 days")
else:
print(f" ⚠ No tiles found within ±14 days, expanding search...")
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=[], debug=False
)
if anchor_tile_paths and anchor_date:
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
rgb_images[anchor_idx] = anchor_rgb
days_offsets[anchor_idx] = 0 # Anchor is reference point
actual_dates[anchor_idx] = anchor_date
used_dates.add(anchor_date)
print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
else:
print(f" ✗ No valid anchor found even within ±60 days")
else:
print(f" ✗ No tiles found for any date within ±60 days")
# STEP 2: Dynamically collect images BEFORE anchor date
# Strategy: Go backwards from anchor with progressively larger search windows
# Start at 7 days, then try 10, 15, 20, 30+ days apart
print(f" [STEP 2] Collecting images BEFORE anchor (going backwards, flexible spacing)...")
before_positions = [7, 6, 5, 4, 3, 2, 1, 0] # Will fill in reverse order (7→0)
before_images = [] # (position, date, rgb, offset)
pos_idx = 0 # Index into before_positions
last_found_date = anchor_date
# Progressive search offsets: try these day offsets in order
search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days before last found image
while pos_idx < len(before_positions) and last_found_date.year >= 2024:
found_this_iteration = False
# Try each offset until we find a valid image
for days_offset in search_offsets:
search_target_date = last_found_date - timedelta(days=days_offset)
tile_paths, actual_date, days_diff = find_overlapping_tiles(
search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
)
if tile_paths and actual_date:
rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
if rgb is not None and has_valid_rgb_data(rgb):
# Found valid image!
overall_max = np.nanmax(rgb)
overall_min = np.nanmin(rgb)
offset_from_anchor = (actual_date - anchor_date).days
before_images.append((before_positions[pos_idx], actual_date, rgb, offset_from_anchor))
used_dates.add(actual_date)
last_found_date = actual_date # Move backwards from this date
print(f" ✓ Before[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")
pos_idx += 1
found_this_iteration = True
break # Found one, stop trying larger offsets
# If nothing found with any offset, we're done collecting before images
if not found_this_iteration:
break
# Store collected before images
for pos, actual_date, rgb, offset in before_images:
rgb_images[pos] = rgb
actual_dates[pos] = actual_date
days_offsets[pos] = offset
# STEP 3: Dynamically collect images AFTER anchor date
# Strategy: Go forwards from anchor with progressively larger search windows
# Start at 7 days, then try 10, 15, 20, 30+ days apart
print(f" [STEP 3] Collecting images AFTER anchor (going forwards, flexible spacing)...")
after_positions = [9, 10, 11, 12, 13, 14] # Will fill in order (9→14)
after_images = [] # (position, date, rgb, offset)
pos_idx = 0 # Index into after_positions
last_found_date = anchor_date
max_search_date = anchor_date + timedelta(days=200) # Don't search beyond 200 days forward
# Progressive search offsets: try these day offsets in order
search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days after last found image
while pos_idx < len(after_positions) and last_found_date < max_search_date:
found_this_iteration = False
# Try each offset until we find a valid image
for days_offset in search_offsets:
search_target_date = last_found_date + timedelta(days=days_offset)
# Don't search beyond max date
if search_target_date > max_search_date:
break
tile_paths, actual_date, days_diff = find_overlapping_tiles(
search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
)
if tile_paths and actual_date:
rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
if rgb is not None and has_valid_rgb_data(rgb):
# Found valid image!
overall_max = np.nanmax(rgb)
overall_min = np.nanmin(rgb)
offset_from_anchor = (actual_date - anchor_date).days
after_images.append((after_positions[pos_idx], actual_date, rgb, offset_from_anchor))
used_dates.add(actual_date)
last_found_date = actual_date # Move forwards from this date
print(f" ✓ After[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")
pos_idx += 1
found_this_iteration = True
break # Found one, stop trying larger offsets
# If nothing found with any offset, we're done collecting after images
if not found_this_iteration:
break
# Store collected after images
for pos, actual_date, rgb, offset in after_images:
rgb_images[pos] = rgb
actual_dates[pos] = actual_date
days_offsets[pos] = offset
# Create 5x3 grid plot (15 images)
fig, axes = plt.subplots(3, 5, figsize=(25, 15))
fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")}',
# Build title with anchor offset information
anchor_offset_from_harvest = (actual_dates[8] - harvest_date).days if actual_dates[8] is not None else None
if anchor_offset_from_harvest is not None and anchor_offset_from_harvest != 0:
anchor_info = f"(Anchor: {actual_dates[8].strftime('%Y-%m-%d')}, {anchor_offset_from_harvest:+d}d from predicted harvest)"
else:
anchor_info = f"(Exact match with anchor: {actual_dates[8].strftime('%Y-%m-%d')})" if actual_dates[8] is not None else ""
fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")} {anchor_info}',
fontsize=16, fontweight='bold')
# Grid positions (5 columns, 3 rows = 15 images)
positions = [
('T-56d', 0, 0), ('T-49d', 0, 1), ('T-42d', 0, 2), ('T-35d', 0, 3), ('T-28d', 0, 4),
('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('T~0d', 1, 3), ('T+7d', 1, 4),
('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('HARVEST', 1, 3), ('T+7d', 1, 4),
('T+14d', 2, 0), ('T+21d', 2, 1), ('T+28d', 2, 2), ('T+35d', 2, 3), ('T+42d', 2, 4),
]
for idx, (label, row, col) in enumerate(positions): # All 15 images
for idx, (label, row, col) in enumerate(positions):
ax = axes[row, col]
if idx < len(rgb_images) and rgb_images[idx] is not None:
rgb_data = rgb_images[idx]
# Debug: check data range
data_min, data_max = np.nanmin(rgb_data), np.nanmax(rgb_data)
print(f" DEBUG: {label} RGB range: {data_min:.4f} - {data_max:.4f}, shape: {rgb_data.shape}")
# Debug: check data range for ALL bands
data_min = np.nanmin(rgb_data)
data_max = np.nanmax(rgb_data)
data_mean = np.nanmean(rgb_data)
data_std = np.nanstd(rgb_data)
# Check per-band stats
r_min, r_max, r_mean = np.nanmin(rgb_data[:,:,0]), np.nanmax(rgb_data[:,:,0]), np.nanmean(rgb_data[:,:,0])
g_min, g_max, g_mean = np.nanmin(rgb_data[:,:,1]), np.nanmax(rgb_data[:,:,1]), np.nanmean(rgb_data[:,:,1])
b_min, b_max, b_mean = np.nanmin(rgb_data[:,:,2]), np.nanmax(rgb_data[:,:,2]), np.nanmean(rgb_data[:,:,2])
print(f" DEBUG VALID {label} ({actual_dates[idx].strftime('%Y-%m-%d')}): RGB overall {data_min:.4f}-{data_max:.4f} (mean={data_mean:.4f}, std={data_std:.4f})")
print(f" R: {r_min:.4f}-{r_max:.4f} (μ={r_mean:.4f}), G: {g_min:.4f}-{g_max:.4f} (μ={g_mean:.4f}), B: {b_min:.4f}-{b_max:.4f} (μ={b_mean:.4f})")
# Display with explicit vmin/vmax to handle normalized 0-1 data
ax.imshow(rgb_data, vmin=0, vmax=1)
# Build title: label + offset + actual date
offset_str = f"{days_offsets[idx]:+d}d" if days_offsets[idx] is not None else "?"
date_str = actual_dates[idx].strftime('%Y-%m-%d') if actual_dates[idx] is not None else "No Date"
ax.set_title(f'{label}\n{offset_str}\n{date_str}', fontsize=10, fontweight='bold')
# Build title: show BOTH anchor offset AND harvest offset
if days_offsets[idx] is not None:
offset_from_anchor = days_offsets[idx]
offset_from_harvest = (actual_dates[idx] - harvest_date).days
# Add red box around harvest date (T~0d at row=1, col=3)
if label == 'T~0d':
if idx == 8: # ANCHOR/HARVEST position
if offset_from_harvest == 0:
offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')})"
else:
offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')}, {offset_from_harvest:+d}d from predicted)"
else:
# Show both offsets: from anchor and from harvest
offset_str = f"{offset_from_anchor:+d}d from anchor\n{offset_from_harvest:+d}d from harvest\n{actual_dates[idx].strftime('%Y-%m-%d')}"
else:
offset_str = "No Data"
ax.set_title(offset_str, fontsize=9, fontweight='bold')
# Add red box around the ANCHOR IMAGE (position 8 is harvest/anchor)
if idx == 8: # Position 8 is the anchor
for spine in ax.spines.values():
spine.set_edgecolor('red')
spine.set_linewidth(4)
else:
ax.text(0.5, 0.5, 'No Data', ha='center', va='center', fontsize=12, color='gray')
ax.set_title(label, fontsize=10)
ax.set_title('No Data', fontsize=10)
print(f" DEBUG EMPTY {label}: No image data collected")
# Add red box for T~0d even if no data
if label == 'T~0d':
# Add red box for anchor position even if no data
if idx == 8: # Position 8 is the anchor
for spine in ax.spines.values():
spine.set_edgecolor('red')
spine.set_linewidth(4)

View file

@ -6,6 +6,8 @@
#' 2. Create master 5×5 grid covering all TIFFs
#' 3. Split each daily TIFF into 25 tiles using the master grid
#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18
library(terra)
library(sf)
@ -32,7 +34,7 @@ if (length(args) >= 2) {
}
PROJECT <- "angata"
TIFF_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
@ -42,10 +44,10 @@ GRID_NCOLS <- 5
# Construct grid-specific subfolder path
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
OUTPUT_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
# Load field boundaries for overlap checking
GEOJSON_PATH <- file.path("..", "laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")

View file

@ -13,13 +13,13 @@
#
# Examples:
# # Angata 8-band data (with UDM cloud masking)
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/02_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
#
# # Aura 4-band data
# Rscript 02_ci_extraction.R 2025-11-26 7 aura merged_tif
# Rscript 20_ci_extraction.R 2025-11-26 7 aura merged_tif
#
# # Auto-detects and uses tiles if available:
# Rscript 02_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists)
# Rscript 20_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists)
# 1. Load required packages
# -----------------------

View file

@ -8,7 +8,7 @@
#
# Usage: Rscript interpolate_growth_model.R [project_dir]
# - project_dir: Project directory name (e.g., "chemba")
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/03_interpolate_growth_model.R angata
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata
# 1. Load required packages
# -----------------------

View file

@ -186,8 +186,15 @@ main <- function() {
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
as.Date(args[1])
} else if (exists("end_date", envir = .GlobalEnv)) {
# For recursive calls, use the end_date that was set in the global environment
get("end_date", envir = .GlobalEnv)
global_date <- get("end_date", envir = .GlobalEnv)
# Check if it's a valid Date with length > 0
if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) {
global_date
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
Sys.Date()
}
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
@ -210,10 +217,15 @@ main <- function() {
7
}
# Validate end_date is a proper Date object
if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) {
stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date))
}
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)
message("\n" %+% strrep("=", 70))
message("\n", strrep("=", 70))
message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
message(strrep("=", 70))
message("Date:", format(end_date, "%Y-%m-%d"))
@ -238,7 +250,7 @@ main <- function() {
# ========== PER-FIELD ANALYSIS (SC-64) ==========
message("\n" %+% strrep("-", 70))
message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
message(strrep("-", 70))
@ -694,9 +706,9 @@ main <- function() {
# ========== FINAL SUMMARY ==========
cat("\n" %+% strrep("=", 70) %+% "\n")
cat("\n", strrep("=", 70), "\n")
cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
cat(strrep("=", 70) %+% "\n")
cat(strrep("=", 70), "\n")
cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
cat("Excel export:", export_paths$excel, "\n")
cat("RDS export:", export_paths$rds, "\n")

View file

@ -1,31 +1,40 @@
# ==============================================================================
# FULL PIPELINE RUNNER
# ==============================================================================
# Runs scripts 02, 03, 04, 09 (KPIs), 09 (Weekly), and 10 (CI Report Simple)
# Mixed Python/R pipeline:
# 1. Python: Download Planet images
# 2. R 10: Create master grid and split TIFFs
# 3. R 20: CI Extraction
# 4. R 21: Convert CI RDS to CSV
# 5. R 30: Interpolate growth model
# 6. Python 31: Harvest imminent weekly
# 7. R 40: Mosaic creation
# 8. R 80: Calculate KPIs
#
# ==============================================================================
# HOW TO RUN THIS SCRIPT
# ==============================================================================
#
# In PowerShell or Command Prompt:
# Run from the smartcane/ directory:
#
# Option 1 (Recommended - shows real-time output):
# Rscript run_full_pipeline.R
# Rscript r_app/run_full_pipeline.R
#
# Option 2 (Full path to Rscript - use & in PowerShell for paths with spaces):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" run_full_pipeline.R
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
#
# Option 3 (Batch mode - output saved to .Rout file):
# R CMD BATCH --vanilla run_full_pipeline.R
# R CMD BATCH --vanilla r_app/run_full_pipeline.R
#
# ==============================================================================
# ==============================================================================
# *** EDIT THESE VARIABLES ***
end_date <- "2025-12-24" # or specify: "2025-12-02", Sys.Date()
end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date()
offset <- 7 # days to look back
project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba"
data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif"
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
# ***************************
# Format dates
@ -35,116 +44,349 @@ end_date_str <- format(as.Date(end_date), "%Y-%m-%d")
pipeline_success <- TRUE
# ==============================================================================
# SCRIPT 02: CI EXTRACTION
# INTELLIGENT CHECKING: What has already been completed?
# ==============================================================================
cat("\n========== RUNNING SCRIPT 02: CI EXTRACTION ==========\n")
tryCatch({
source("r_app/02_ci_extraction.R")
main() # Call the main function
cat("✓ Script 02 completed\n")
}, error = function(e) {
cat("✗ Error in Script 02:", e$message, "\n")
pipeline_success <<- FALSE
})
cat("\n========== CHECKING EXISTING OUTPUTS ==========\n")
# ==============================================================================
# SCRIPT 03: INTERPOLATE GROWTH MODEL
# ==============================================================================
cat("\n========== RUNNING SCRIPT 03: INTERPOLATE GROWTH MODEL ==========\n")
tryCatch({
source("r_app/03_interpolate_growth_model.R")
main() # Call the main function
cat("✓ Script 03 completed\n")
}, error = function(e) {
cat("✗ Error in Script 03:", e$message, "\n")
pipeline_success <<- FALSE
})
# ==============================================================================
# SCRIPT 04: MOSAIC CREATION
# ==============================================================================
cat("\n========== RUNNING SCRIPT 04: MOSAIC CREATION ==========\n")
tryCatch({
source("r_app/04_mosaic_creation.R")
main() # Call the main function
cat("✓ Script 04 completed\n")
}, error = function(e) {
cat("✗ Error in Script 04:", e$message, "\n")
pipeline_success <<- FALSE
})
# ==============================================================================
# SCRIPT 09: CALCULATE KPIs
# ==============================================================================
cat("\n========== RUNNING SCRIPT 09: CALCULATE KPIs ==========\n")
tryCatch({
source("r_app/09_calculate_kpis.R")
main() # Call the main function
cat("✓ Script 09 (KPIs) completed\n")
}, error = function(e) {
cat("✗ Error in Script 09 (KPIs):", e$message, "\n")
pipeline_success <<- FALSE
})
# ==============================================================================
# SCRIPT 09: FIELD ANALYSIS WEEKLY
# ==============================================================================
# Only run field analysis weekly for angata project
if (project_dir == "angata") {
cat("\n========== RUNNING SCRIPT 09: FIELD ANALYSIS WEEKLY ==========\n")
tryCatch({
source("r_app/09_field_analysis_weekly.R")
main() # Call the main function
cat("✓ Script 09 (Weekly) completed\n")
}, error = function(e) {
cat("✗ Error in Script 09 (Weekly):", e$message, "\n")
pipeline_success <<- FALSE
})
# Check Script 10 outputs (tiled splits)
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
tiles_dates <- if (dir.exists(tiles_dir)) {
list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
} else {
cat("\n========== SKIPPING SCRIPT 09: FIELD ANALYSIS WEEKLY (only runs for angata) ==========\n")
c()
}
cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates)))
# Check Script 20 outputs (CI extraction) - daily RDS files
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
ci_files <- if (dir.exists(ci_daily_dir)) {
list.files(ci_daily_dir, pattern = "\\.rds$")
} else {
c()
}
cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
# Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this
# Instead, check if CI RDS files exist - if they do, 21 should also run
# For now, just note that CSV is time-dependent, not a good skip indicator
cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")
# Check Script 40 outputs (mosaics in weekly_tile_max/5x5)
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
mosaic_files <- if (dir.exists(mosaic_dir)) {
list.files(mosaic_dir, pattern = "\\.tif$")
} else {
c()
}
cat(sprintf("Script 40: %d mosaic files exist\n", length(mosaic_files)))
# Check Script 80 outputs (KPIs in reports/kpis/field_stats)
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
kpi_files <- if (dir.exists(kpi_dir)) {
list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
} else {
c()
}
cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))
# Determine if scripts should run based on outputs
skip_10 <- length(tiles_dates) > 0 && !force_rerun
skip_20 <- length(ci_files) > 0 && !force_rerun
skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped
skip_40 <- length(mosaic_files) > 0 && !force_rerun
skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks
cat("\nSkipping decisions:\n")
cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP (tiles exist)" else "RUN"))
cat(sprintf(" Script 20: %s\n", if(skip_20) "SKIP (CI exists)" else "RUN"))
cat(sprintf(" Script 21: %s\n", if(skip_21) "SKIP (CI exists)" else "RUN"))
cat(sprintf(" Script 40: %s\n", if(skip_40) "SKIP (mosaics exist)" else "RUN"))
cat(sprintf(" Script 80: %s\n", if(skip_80) "SKIP (KPIs exist)" else "RUN"))
# ==============================================================================
# PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY)
# ==============================================================================
cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n")
tryCatch({
# Setup paths
base_path <- file.path("laravel_app", "storage", "app", project_dir)
merged_tifs_dir <- file.path(base_path, data_source)
# Get existing dates from raw TIFFs
existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)
# Get existing dates from tiles (better indicator of completion)
existing_tile_dates <- tiles_dates
# Find missing dates in the window
start_date <- end_date - offset
date_seq <- seq(start_date, end_date, by = "day")
target_dates <- format(date_seq, "%Y-%m-%d")
# Only download if tiles don't exist yet (more reliable than checking raw TIFFs)
missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]
cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates)))
cat(sprintf(" Missing dates in window: %d\n", length(missing_dates)))
# Download each missing date
download_count <- 0
download_failed <- 0
if (length(missing_dates) > 0) {
# Save current directory
original_dir <- getwd()
# Change to python_app directory so relative paths work correctly
setwd("python_app")
for (date_str in missing_dates) {
cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str)
result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE)
if (result == 0) {
download_count <- download_count + 1
} else {
download_failed <- download_failed + 1
}
}
# Change back to original directory
setwd(original_dir)
}
cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed))
if (download_failed > 0) {
cat("⚠ Some downloads failed, but continuing pipeline\n")
}
# Force Script 10 to run ONLY if downloads actually succeeded (not just attempted)
if (download_count > 0) {
skip_10 <- FALSE
}
}, error = function(e) {
cat("✗ Error in planet download:", e$message, "\n")
pipeline_success <<- FALSE
})
# ==============================================================================
# SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs
# ==============================================================================
if (pipeline_success && !skip_10) {
cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n")
tryCatch({
# Set environment variables for the script (Script 10 uses these for filtering)
assign("PROJECT", project_dir, envir = .GlobalEnv)
# Suppress verbose per-date output, show only summary
sink(nullfile())
source("r_app/10_create_master_grid_and_split_tiffs.R")
sink()
# Verify output
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
if (dir.exists(tiles_dir)) {
subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs)))
} else {
cat("✓ Script 10 completed\n")
}
}, error = function(e) {
sink()
cat("✗ Error in Script 10:", e$message, "\n")
pipeline_success <<- FALSE
})
} else if (skip_10) {
cat("\n========== SKIPPING SCRIPT 10 (tiles already exist) ==========\n")
}
# ==============================================================================
# SCRIPT 91: CI REPORT ANGATA (only for angata)
# SCRIPT 20: CI EXTRACTION
# ==============================================================================
if (pipeline_success && !skip_20) {
cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n")
tryCatch({
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
source("r_app/20_ci_extraction.R")
main() # Call main() to execute the script with the environment variables
# Verify CI output was created
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
if (dir.exists(ci_daily_dir)) {
files <- list.files(ci_daily_dir, pattern = "\\.rds$")
cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files)))
} else {
cat("✓ Script 20 completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 20:", e$message, "\n")
pipeline_success <<- FALSE
})
} else if (skip_20) {
cat("\n========== SKIPPING SCRIPT 20 (CI already extracted) ==========\n")
}
# ==============================================================================
# SCRIPT 21: CONVERT CI RDS TO CSV
# ==============================================================================
if (pipeline_success && !skip_21) {
cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n")
tryCatch({
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
source("r_app/21_convert_ci_rds_to_csv.R")
main() # Call main() to execute the script with the environment variables
# Verify CSV output was created
ci_csv_path <- file.path("laravel_app", "storage", "app", project_dir, "ci_extracted")
if (dir.exists(ci_csv_path)) {
csv_files <- list.files(ci_csv_path, pattern = "\\.csv$")
cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files)))
} else {
cat("✓ Script 21 completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 21:", e$message, "\n")
pipeline_success <<- FALSE
})
} else if (skip_21) {
cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n")
}
# ==============================================================================
# SCRIPT 30: INTERPOLATE GROWTH MODEL
# ==============================================================================
if (project_dir == "angata") {
cat("\n========== RUNNING SCRIPT 91: CI REPORT ANGATA ==========\n")
if (pipeline_success) {
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
tryCatch({
rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd",
output_format = "word_document",
params = list(data_dir = project_dir, report_date = end_date_str))
cat("✓ Script 91 (Report) completed\n")
}, error = function(e) {
cat("✗ Error in Script 91 (Report):", e$message, "\n")
})
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
source("r_app/30_interpolate_growth_model.R")
main() # Call main() to execute the script with the environment variables
# Verify interpolated output
growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
if (dir.exists(growth_dir)) {
files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
} else {
cat("✗ Skipping Script 91: Previous pipeline scripts failed\n")
cat("✓ Script 30 completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 30:", e$message, "\n")
pipeline_success <<- FALSE
})
}
# ==============================================================================
# SCRIPT 10: CI REPORT (SIMPLE)
# PYTHON 31: HARVEST IMMINENT WEEKLY
# ==============================================================================
# Only run CI report for non-angata projects
if (project_dir != "angata") {
cat("\n========== RUNNING SCRIPT 10: CI REPORT SIMPLE ==========\n")
if (pipeline_success) {
cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n")
tryCatch({
rmarkdown::render("r_app/10_CI_report_with_kpis_simple.Rmd",
output_format = "word_document",
params = list(data_dir = project_dir, report_date = end_date_str))
cat("✓ Script 10 (Report) completed\n")
}, error = function(e) {
cat("✗ Error in Script 10 (Report):", e$message, "\n")
})
# Run Python script in pytorch_gpu conda environment
# Script expects positional project name (not --project flag)
# Run from smartcane root so conda can find the environment
cmd <- sprintf('conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s', project_dir)
cat("DEBUG: Running command:", cmd, "\n")
result <- system(cmd)
if (result == 0) {
# Verify harvest output - check for THIS WEEK's specific file
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%Y"))
expected_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats",
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
if (file.exists(expected_file)) {
cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", current_week))
} else {
cat("✗ Skipping Script 10: Previous pipeline scripts failed\n")
cat("✓ Script 31 completed (check if harvest.xlsx is available)\n")
}
} else {
cat("\n========== SKIPPING SCRIPT 10: CI REPORT SIMPLE (not applicable for angata) ==========\n")
cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n")
}
}, error = function(e) {
setwd(original_dir)
cat("⚠ Script 31 error:", e$message, "\n")
})
}
# ==============================================================================
# SCRIPT 40: MOSAIC CREATION
# ==============================================================================
if (pipeline_success && !skip_40) {
cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n")
tryCatch({
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
source("r_app/40_mosaic_creation.R")
main() # Call main() to execute the script with the environment variables
# Verify mosaic output
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
if (dir.exists(mosaic_dir)) {
files <- list.files(mosaic_dir, pattern = "\\.tif$")
cat(sprintf("✓ Script 40 completed - generated %d mosaic files\n", length(files)))
} else {
cat("✓ Script 40 completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 40:", e$message, "\n")
pipeline_success <<- FALSE
})
} else if (skip_40) {
cat("\n========== SKIPPING SCRIPT 40 (mosaics already created) ==========\n")
}
# ==============================================================================
# SCRIPT 80: CALCULATE KPIs
# ==============================================================================
if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the current week
cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n")
tryCatch({
# Set environment variables for the script (Script 80's main() uses these as fallbacks)
# NOTE: end_date is already a Date, just assign directly without as.Date()
assign("end_date", end_date, envir = .GlobalEnv)
assign("end_date_str", end_date_str, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
source("r_app/80_calculate_kpis.R")
main() # Call main() to execute the script with the environment variables
# Verify KPI output
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
if (dir.exists(kpi_dir)) {
files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
cat(sprintf("✓ Script 80 completed - generated %d KPI files\n", length(files)))
} else {
cat("✓ Script 80 completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 80:", e$message, "\n")
cat("Full error:\n")
print(e)
pipeline_success <<- FALSE
})
}
# ==============================================================================
@ -154,4 +396,9 @@ cat("\n========== PIPELINE COMPLETE ==========\n")
cat(sprintf("Project: %s\n", project_dir))
cat(sprintf("End Date: %s\n", end_date_str))
cat(sprintf("Offset: %d days\n", offset))
cat("Scripts executed: 02, 03, 04, 09 (KPIs), 09 (Weekly), 10 (CI Report)\n")
if (pipeline_success) {
cat("Status: ✓ All scripts completed successfully\n")
} else {
cat("Status: ✗ Pipeline failed - check errors above\n")
}
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n")