Refactor full pipeline script to include intelligent checking of existing outputs and dynamic execution of scripts. Added new Python scripts for RGB validation and evaluation template creation. Enhanced error handling and logging throughout the pipeline.

2026-01-27 08:58:06 +01:00 · 2026-01-27 08:58:06 +01:00 · 9a55d2fcf8
parent 4143bdf4d7
commit 9a55d2fcf8
14 changed files with 1035 additions and 568 deletions
--- a/python_app/31_harvest_imminent_weekly.py
+++ b/python_app/31_harvest_imminent_weekly.py
@ -1,4 +1,4 @@
-"""
+r"""
 Script: 02_harvest_imminent_weekly.py
 Purpose: WEEKLY MONITORING - Run WEEKLY/DAILY to get real-time harvest status for all fields
@ -38,12 +38,12 @@ Use Cases:
  - Feed into 09b script for weekly dashboard reports
 Usage:
-  python 02_harvest_imminent_weekly.py [project_name]
+  python python_app/31_harvest_imminent_weekly.py angata
  Examples:
-    python 02_harvest_imminent_weekly.py angata
+    python python_app/31_harvest_imminent_weekly.py angata
-    python 02_harvest_imminent_weekly.py esa
+    python python_app/31_harvest_imminent_weekly.py esa
-    python 02_harvest_imminent_weekly.py chemba
+    python python_app/31_harvest_imminent_weekly.py chemba
  If no project specified, defaults to 'angata'
 """
@ -264,7 +264,7 @@ def main():
    # [3] Load model (from python_app directory)
    print("\n[3/5] Loading Model 307...")
-    model_dir = Path(".")  # Current directory is python_app/, contains model.pt, config.json, scalers.pkl
+    model_dir = Path("python_app")  # Model files located in python_app/ directory
    model, config, scalers = load_model_and_config(model_dir)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"  Device: {device}")
--- a/python_app/batch_rgb_validation_top_fields_v3.py
+++ b/python_app/batch_rgb_validation_top_fields_v3.py
@ -1,23 +1,25 @@
 #!/usr/bin/env python
 """
-Batch RGB Validation for Top 50 Largest Fields
+Batch RGB Validation for Top 100 Largest Fields - V3
-Generates 5x3 RGB temporal grids for the latest complete harvest season of the 50 largest fields.
+Same as v1 but with dynamic image selection (checks for actual data, skips empty/black images).
 Generates 5x3 RGB temporal grids for the latest complete harvest season of the 100 largest fields.
 Uses actual season_end dates from harvest.xlsx for visual validation of field conditions at harvest.
 Configuration:
 - GeoJSON: pivot.geojson (defines field boundaries and sizes)
 - Harvest data: harvest.xlsx (season_end dates for completed harvests)
 - CI data: ci_data_for_python.csv
 - Output: RGB directory with field_name_YYYYMMDD_harvest_rgb.png
 Usage:
-    python batch_rgb_validation_top_fields.py
+    python batch_rgb_validation_top_fields_v3.py --field 1
    python batch_rgb_validation_top_fields_v3.py
 Output:
    - Saves 5x3 RGB grids to: laravel_app/storage/app/angata/RGB/
    - Filenames: field_<NAME>_<YYYYMMDD>_harvest_rgb.png
-    - Each grid shows 15 images at 7-day intervals around the season_end date
+    - Each grid shows 15 images around the harvest date (dynamic date selection, skips empty images)
 """
 import json
@ -26,6 +28,7 @@ import pandas as pd
 from pathlib import Path
 from datetime import datetime, timedelta
 import sys
 import argparse
 # Add parent directory to path for imports
 sys.path.insert(0, str(Path(__file__).parent))
@ -189,16 +192,26 @@ def load_harvest_dates_from_xlsx(harvest_xlsx_path, top_50_fields_df):
 def main():
    parser = argparse.ArgumentParser(description='RGB validation of harvest dates using satellite imagery (v3 - dynamic)')
    parser.add_argument('--field', type=str, default=None, help='Specific field ID to validate (e.g., "1" or "10022")')
    parser.add_argument('--project', type=str, default='angata', help='Project name (default: angata)')
    args = parser.parse_args()
    print("="*90)
-    print("BATCH RGB VALIDATION - TOP 50 LARGEST FIELDS")
+    if args.field:
-    print("Visual inspection of latest harvest dates from harvest.xlsx using RGB imagery")
+        print(f"RGB VALIDATION V3 - SINGLE FIELD: {args.field}")
    else:
        print("RGB VALIDATION V3 - TOP 50 LARGEST FIELDS")
    print("Visual inspection of harvest dates from harvest.xlsx using RGB imagery (dynamic selection)")
    print("="*90)
    # Configuration
-    geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
+    project = args.project
-    harvest_xlsx = Path("laravel_app/storage/app/angata/Data/harvest.xlsx")
+    geojson_path = Path(f"laravel_app/storage/app/{project}/Data/pivot.geojson")
-    output_dir = Path("laravel_app/storage/app/angata/RGB")
+    harvest_xlsx = Path(f"laravel_app/storage/app/{project}/Data/harvest.xlsx")
-    tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
+    output_dir = Path(f"laravel_app/storage/app/{project}/RGB")
    tiff_dir = Path(f"laravel_app/storage/app/{project}/merged_final_tif/5x5")
    # Verify paths
    if not geojson_path.exists():
@ -213,18 +226,83 @@ def main():
    output_dir.mkdir(parents=True, exist_ok=True)
-    # Step 1: Load GeoJSON and get top 50 largest fields
+    # Handle single field mode
-    print("\n[1/4] Loading GeoJSON and identifying top 50 largest fields...")
+    if args.field:
        print(f"\n[1/3] Loading harvest data for field {args.field}...")
        harvest_df = pd.read_excel(harvest_xlsx)
        harvest_df['season_end'] = pd.to_datetime(harvest_df['season_end'], errors='coerce')
        harvest_df['field'] = harvest_df['field'].astype(str).str.strip()
        field_records = harvest_df[harvest_df['field'] == args.field]
        field_records = field_records[field_records['season_end'].notna()]
        if len(field_records) == 0:
            print(f"✗ No harvest data found for field {args.field}")
            return
        # Get latest harvest for this field
        latest_idx = field_records['season_end'].idxmax()
        latest_row = field_records.loc[latest_idx]
        harvest_date = latest_row['season_end']
        print(f"  ✓ Found harvest: {harvest_date.strftime('%Y-%m-%d')}")
        # Load field name from GeoJSON
        print(f"\n[2/3] Loading field name from GeoJSON...")
        with open(geojson_path) as f:
            geojson_data = json.load(f)
        field_name = f"field_{args.field}"
        for feature in geojson_data.get('features', []):
            props = feature.get('properties', {})
            if str(props.get('field', '')) == args.field:
                field_name = props.get('name', field_name)
                break
        print(f"  ✓ Field name: {field_name}")
        # Generate RGB grid
        print(f"\n[3/3] Generating RGB validation grid (v3 dynamic)...")
        results = generate_rgb_grids(
            field_data=None,
            field_id=args.field,
            registered_harvest_dates=[],
            predicted_harvest_dates=[
                {
                    'harvest_date': harvest_date,
                    'model_name': 'harvest_xlsx'
                }
            ],
            output_dir=str(output_dir),
            tiff_dir=str(tiff_dir),
            geojson_path=str(geojson_path)
        )
        print("\n" + "="*90)
        if results['predicted']:
            print(f"✓ RGB grid generated successfully!")
            print(f"  Field: {field_name} (ID: {args.field})")
            print(f"  Harvest date: {harvest_date.strftime('%Y-%m-%d')}")
            print(f"  Output: {output_dir}")
        else:
            print(f"⚠ No RGB grid generated (no imagery available)")
        print("="*90)
        return
    # Batch mode for top 100 fields
    print(f"\n[1/4] Loading GeoJSON and identifying top 100 largest fields...")
    fields_df = load_geojson_and_calculate_areas(geojson_path)
    if fields_df is None:
        return
-    top_50_fields = fields_df.head(50)
+    top_100_fields = fields_df.head(100)
-    print(f"  ✓ Selected {len(top_50_fields)} largest fields for processing")
+    print(f"  ✓ Selected {len(top_100_fields)} largest fields for processing")
    # Step 2: Load harvest dates from Excel
    print("\n[2/4] Loading harvest dates from Excel (latest complete seasons)...")
-    harvest_dates = load_harvest_dates_from_xlsx(harvest_xlsx, top_50_fields)
+    harvest_dates = load_harvest_dates_from_xlsx(harvest_xlsx, top_100_fields)
    if len(harvest_dates) == 0:
        print("✗ No harvest dates found in Excel file")
@ -237,7 +315,7 @@ def main():
        print(f"    ... and {len(harvest_dates) - 5} more")
    # Step 3: Generate RGB grids for each field
-    print("\n[3/4] Generating RGB validation grids...")
+    print("\n[3/4] Generating RGB validation grids (v3 dynamic)...")
    rgb_count = 0
    for idx, (field_id, harvest_info) in enumerate(harvest_dates.items(), 1):
--- a/python_app/create_rgb_evaluation_template.py
+++ b/python_app/create_rgb_evaluation_template.py
@ -0,0 +1,193 @@
 """
 Create an Excel evaluation template for RGB harvest date predictions.
 Parses field names and dates directly from RGB image filenames.
 """
 import os
 import glob
 import pandas as pd
 from openpyxl import Workbook
 from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
 from openpyxl.utils import get_column_letter
 import re
 from datetime import datetime
 # Configuration
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 RGB_DIR = os.path.join(BASE_DIR, "laravel_app", "storage", "app", "angata", "RGB")
 OUTPUT_PATH = os.path.join(BASE_DIR, "laravel_app", "storage", "app", "angata", "RGB_Evaluation_Template.xlsx")
 # Evaluators
 EVALUATORS = ["Joey", "Daniel", "Nik", "Dimitra", "Timon"]
 def parse_rgb_filenames():
    """
    Parse field names and harvest dates from RGB image filenames.
    Expected format: field_{field_id or name}_{YYYYMMDD}_harvest_xlsx_harvest_rgb.png
    """
    fields_data = []
    # Find all RGB images
    rgb_files = glob.glob(os.path.join(RGB_DIR, "field_*.png"))
    for filepath in sorted(rgb_files):
        filename = os.path.basename(filepath)
        # Pattern: field_{field_id}_{YYYYMMDD}_harvest_xlsx_harvest_rgb.png
        match = re.match(r"field_(.+?)_(\d{8})_harvest_xlsx_harvest_rgb\.png", filename)
        if match:
            field_id = match.group(1)  # e.g., "1000" or "91&92"
            date_str = match.group(2)  # e.g., "20250814"
            # Format date as YYYY-MM-DD
            try:
                harvest_date = datetime.strptime(date_str, "%Y%m%d").strftime("%Y-%m-%d")
            except ValueError:
                harvest_date = date_str
            fields_data.append({
                "field_id": field_id,
                "harvest_date": harvest_date,
                "filename": filename
            })
    # Sort by field_id (treating numeric parts as integers where possible)
    fields_data = sorted(fields_data, key=lambda x: (x["field_id"].replace("&92", ""), ))
    return fields_data
 def create_evaluation_template():
    """Create the Excel evaluation template."""
    print("Loading field data from RGB images...")
    fields_data = parse_rgb_filenames()
    if not fields_data:
        print("ERROR: No RGB images found in", RGB_DIR)
        return
    print(f"Found {len(fields_data)} RGB images")
    # Create workbook
    wb = Workbook()
    # === SHEET 1: Evaluation Form ===
    ws_eval = wb.active
    ws_eval.title = "Evaluation"
    # Define styles
    header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
    header_font = Font(bold=True, color="FFFFFF", size=11)
    border = Border(
        left=Side(style='thin'),
        right=Side(style='thin'),
        top=Side(style='thin'),
        bottom=Side(style='thin')
    )
    center_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
    left_align = Alignment(horizontal="left", vertical="center", wrap_text=True)
    # Column headers
    headers = ["Field ID", "Predicted Harvest Date"] + EVALUATORS
    for col_idx, header in enumerate(headers, start=1):
        cell = ws_eval.cell(row=1, column=col_idx, value=header)
        cell.fill = header_fill
        cell.font = header_font
        cell.alignment = center_align
        cell.border = border
    # Set column widths
    ws_eval.column_dimensions['A'].width = 15
    ws_eval.column_dimensions['B'].width = 20
    for col_idx in range(3, 3 + len(EVALUATORS)):
        ws_eval.column_dimensions[get_column_letter(col_idx)].width = 12
    # Add data rows
    for row_idx, field in enumerate(fields_data, start=2):
        ws_eval.cell(row=row_idx, column=1, value=field["field_id"])
        ws_eval.cell(row=row_idx, column=2, value=field["harvest_date"])
        # Add empty cells for evaluator responses
        for col_idx in range(3, 3 + len(EVALUATORS)):
            cell = ws_eval.cell(row=row_idx, column=col_idx)
            cell.alignment = center_align
            cell.border = border
        # Light alternating row color
        if row_idx % 2 == 0:
            for col_idx in range(1, 3 + len(EVALUATORS)):
                ws_eval.cell(row=row_idx, column=col_idx).fill = PatternFill(
                    start_color="D9E8F5", end_color="D9E8F5", fill_type="solid"
                )
        # Apply borders to all data cells
        for col_idx in range(1, 3 + len(EVALUATORS)):
            ws_eval.cell(row=row_idx, column=col_idx).border = border
            if col_idx == 1 or col_idx == 2:
                ws_eval.cell(row=row_idx, column=col_idx).alignment = left_align
    # Freeze panes
    ws_eval.freeze_panes = "C2"
    # === SHEET 2: Instructions ===
    ws_instr = wb.create_sheet("Instructions")
    instr_content = [
        ["RGB Evaluation Instructions", ""],
        ["", ""],
        ["Overview:", ""],
        ["The generated RGB images visualize the predicted harvest dates for each field.", ""],
        ["The images are 3x3 grids showing satellite imagery from different dates", ""],
        ["centered on the predicted harvest date (the center/red-box image).", ""],
        ["", ""],
        ["What to Evaluate:", ""],
        ["For each field, determine if the predicted harvest date is CORRECT:", ""],
        ["", ""],
        ["Instructions for Reviewing:", ""],
        ["1. Look at the CENTER image (red box) - this is the predicted harvest date", ""],
        ["2. Compare to surrounding dates (before and after)", ""],
        ["3. Look for change in field color/status:", ""],
        ["   • BEFORE: Field appears GREEN (growing/healthy crop)", ""],
        ["   • AT PREDICTED DATE: Field shows BROWN/YELLOW (soil visible, ripe for harvest)", ""],
        ["   • AFTER: Field continues BROWN (post-harvest or dormant)", ""],
        ["", ""],
        ["How to Enter Your Assessment:", ""],
        ["Enter one of the following in your evaluator column for each field:", ""],
        ["  • YES  = Predicted date is CORRECT (brown/harvest-ready at center date)", ""],
        ["  • NO   = Predicted date is INCORRECT (not ready or already harvested)", ""],
        ["  • ? or MAYBE = Uncertain (cloudy images, unclear field status)", ""],
        ["", ""],
        ["Workflow Options:", ""],
        ["Option A (Divide Work): Assign 2-3 fields per evaluator (rows divided by column)", ""],
        ["Option B (Full Review):  Each evaluator reviews all fields (everyone fills all rows)", ""],
        ["Option C (Spot Check):  Each evaluator samples 5-10 random fields", ""],
        ["", ""],
        ["Image Location:", ""],
        ["All RGB images are in: /laravel_app/storage/app/angata/RGB/", ""],
        ["Format: field_{FIELD_ID}_{YYYY-MM-DD}_harvest_xlsx_harvest_rgb.png", ""],
        ["", ""],
        ["Notes:", ""],
        ["• Cloud cover may obscure ground truth - use best judgment", ""],
        ["• Fields with multiple bands or irregular shapes: focus on dominant area", ""],
        ["• Use context from previous/next dates to validate your assessment", ""],
    ]
    # Add instructions text
    for row_idx, row_data in enumerate(instr_content, start=1):
        for col_idx, value in enumerate(row_data, start=1):
            cell = ws_instr.cell(row=row_idx, column=col_idx, value=value)
            if row_idx == 1:
                cell.font = Font(bold=True, size=14)
            elif any(keyword in str(value) for keyword in ["Overview:", "Instructions", "Workflow", "Image Location", "Notes"]):
                cell.font = Font(bold=True, size=11)
            cell.alignment = Alignment(horizontal="left", vertical="top", wrap_text=True)
    ws_instr.column_dimensions['A'].width = 50
    ws_instr.column_dimensions['B'].width = 80
    # Save workbook
    wb.save(OUTPUT_PATH)
    print(f"✓ Evaluation template created: {OUTPUT_PATH}")
    print(f"✓ {len(fields_data)} fields added to evaluation form")
    print(f"✓ Evaluators: {', '.join(EVALUATORS)}")
 if __name__ == "__main__":
    create_evaluation_template()
--- a/python_app/debug_all_tiles_for_date.py
+++ b/python_app/debug_all_tiles_for_date.py
@ -1,107 +0,0 @@
 #!/usr/bin/env python
 """
 Debug script to find all tiles for a date and check which overlap with field boundary
 """
 import json
 import rasterio
 from rasterio.mask import mask
 from pathlib import Path
 import numpy as np
 import shapely.geometry as shgeom
 import pandas as pd
 # Load field 79 boundary
 geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
 field_id = "79"
 print(f"Loading field {field_id} from GeoJSON...")
 with open(geojson_path) as f:
    geojson_data = json.load(f)
 field_boundary = None
 for feature in geojson_data.get('features', []):
    props = feature.get('properties', {})
    if str(props.get('field', '')) == str(field_id):
        geometry = feature.get('geometry')
        if geometry:
            geom_type = geometry.get('type', '')
            coordinates = geometry.get('coordinates', [])
            if geom_type == 'MultiPolygon':
                if coordinates and len(coordinates) > 0:
                    coords = coordinates[0][0]
                    field_boundary = shgeom.Polygon(coords)
            elif geom_type == 'Polygon':
                if coordinates and len(coordinates) > 0:
                    coords = coordinates[0]
                    field_boundary = shgeom.Polygon(coords)
        break
 if field_boundary is None:
    print(f"Field {field_id} not found")
    exit(1)
 print(f"Field boundary bounds: {field_boundary.bounds}")
 print(f"Field boundary area: {field_boundary.area}")
 # Find a specific date directory
 tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
 target_date = pd.Timestamp("2026-01-15")  # Use a recent date that exists
 # Find tiles for that date
 date_dirs = []
 for date_dir in tiff_dir.iterdir():
    if date_dir.is_dir():
        try:
            dir_name = date_dir.name
            date_str = dir_name.split('_')[0]
            tile_date = pd.Timestamp(date_str)
            if tile_date == target_date:
                date_dirs.append(date_dir)
        except:
            pass
 if not date_dirs:
    print(f"No tiles found for {target_date}")
    exit(1)
 print(f"\nFound {len(date_dirs)} date directory(ies) for {target_date}")
 for date_dir in date_dirs:
    print(f"\n=== Checking date directory: {date_dir.name} ===")
    tiles = list(date_dir.glob("*.tif"))
    print(f"Found {len(tiles)} tiles in this directory")
    for tile_path in sorted(tiles):
        try:
            with rasterio.open(tile_path) as src:
                tile_bounds = src.bounds
                tile_geom = shgeom.box(*tile_bounds)
                intersects = field_boundary.intersects(tile_geom)
                intersection = field_boundary.intersection(tile_geom) if intersects else None
                intersection_area = intersection.area if intersection else 0
                print(f"\n{tile_path.name}")
                print(f"  Tile bounds: {tile_bounds}")
                print(f"  Intersects field: {intersects}")
                if intersects:
                    print(f"  Intersection area: {intersection_area:.8f}")
                    # Try to mask this tile
                    geom = shgeom.mapping(field_boundary)
                    try:
                        masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3])
                        print(f"  ✓ Successfully masked! Shape: {masked_data.shape}")
                        # Check the data in each band
                        for i, band_idx in enumerate([1, 2, 3]):
                            band_data = masked_data[i]
                            non_zero = (band_data != 0).sum()
                            print(f"    Band {band_idx}: {non_zero} non-zero pixels out of {band_data.size}")
                    except ValueError as e:
                        print(f"  ✗ Masking failed: {e}")
        except Exception as e:
            print(f"  Error reading tile: {e}")
--- a/python_app/debug_field_mask.py
+++ b/python_app/debug_field_mask.py
@ -1,102 +0,0 @@
 #!/usr/bin/env python
 """
 Debug script to diagnose why field boundary masking produces no data
 """
 import json
 import rasterio
 from rasterio.mask import mask
 from pathlib import Path
 import numpy as np
 import shapely.geometry as shgeom
 # Load a sample field boundary
 geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
 field_id = "79"  # A field that had issues
 print(f"Loading field {field_id} from GeoJSON...")
 with open(geojson_path) as f:
    geojson_data = json.load(f)
 field_boundary = None
 for feature in geojson_data.get('features', []):
    props = feature.get('properties', {})
    if str(props.get('field', '')) == str(field_id):
        geometry = feature.get('geometry')
        if geometry:
            geom_type = geometry.get('type', '')
            coordinates = geometry.get('coordinates', [])
            if geom_type == 'MultiPolygon':
                if coordinates and len(coordinates) > 0:
                    coords = coordinates[0][0]
                    field_boundary = shgeom.Polygon(coords)
            elif geom_type == 'Polygon':
                if coordinates and len(coordinates) > 0:
                    coords = coordinates[0]
                    field_boundary = shgeom.Polygon(coords)
        break
 if field_boundary is None:
    print(f"Field {field_id} not found")
    exit(1)
 print(f"Field boundary bounds: {field_boundary.bounds}")
 print(f"Field boundary area: {field_boundary.area}")
 # Load a sample TIFF tile
 tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
 tile_file = None
 for date_dir in sorted(tiff_dir.iterdir()):
    if date_dir.is_dir():
        for tif in date_dir.glob("*.tif"):
            if tif.stat().st_size > 12e6:
                tile_file = tif
                break
        if tile_file:
            break
 if not tile_file:
    print("No suitable TIFF found")
    exit(1)
 print(f"\nTesting with TIFF: {tile_file.name}")
 with rasterio.open(tile_file) as src:
    print(f"TIFF Bounds: {src.bounds}")
    print(f"TIFF CRS: {src.crs}")
    # Check if field boundary is within tile bounds
    tile_box = shgeom.box(*src.bounds)
    intersects = field_boundary.intersects(tile_box)
    print(f"Field boundary intersects tile: {intersects}")
    if intersects:
        intersection = field_boundary.intersection(tile_box)
        print(f"Intersection area: {intersection.area}")
        print(f"Intersection bounds: {intersection.bounds}")
    # Try to mask and see what we get
    print("\nAttempting to mask...")
    geom = shgeom.mapping(field_boundary)
    try:
        masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3])
        print(f"Masked data shape: {masked_data.shape}")
        print(f"Masked data dtype: {masked_data.dtype}")
        # Check the data
        for i, band_idx in enumerate([1, 2, 3]):
            band_data = masked_data[i]
            print(f"\nBand {band_idx}:")
            print(f"  min: {np.nanmin(band_data):.6f}")
            print(f"  max: {np.nanmax(band_data):.6f}")
            print(f"  mean: {np.nanmean(band_data):.6f}")
            print(f"  % valid (non-zero): {(band_data != 0).sum() / band_data.size * 100:.2f}%")
            print(f"  % NaN: {np.isnan(band_data).sum() / band_data.size * 100:.2f}%")
            # Show sample values
            valid_pixels = band_data[band_data != 0]
            if len(valid_pixels) > 0:
                print(f"  Sample valid values: {valid_pixels[:10]}")
    except ValueError as e:
        print(f"Error during masking: {e}")
--- a/python_app/debug_tiff_inspect.py
+++ b/python_app/debug_tiff_inspect.py
@ -1,47 +0,0 @@
 #!/usr/bin/env python
 """
 Debug script to inspect TIFF file structure and data
 """
 import rasterio
 from pathlib import Path
 import numpy as np
 # Pick a tile file to inspect
 tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
 # Find first available tile
 tile_file = None
 for date_dir in sorted(tiff_dir.iterdir()):
    if date_dir.is_dir():
        for tif in date_dir.glob("*.tif"):
            if tif.stat().st_size > 12e6:  # Skip empty files
                tile_file = tif
                break
        if tile_file:
            break
 if not tile_file:
    print("No suitable TIFF files found")
    exit(1)
 print(f"Inspecting: {tile_file.name}")
 print("=" * 80)
 with rasterio.open(tile_file) as src:
    print(f"Band count: {src.count}")
    print(f"Data type: {src.dtypes[0]}")
    print(f"Shape: {src.height} x {src.width}")
    print(f"CRS: {src.crs}")
    print(f"Bounds: {src.bounds}")
    print()
    # Read each band
    for band_idx in range(1, min(6, src.count + 1)):
        data = src.read(band_idx)
        print(f"Band {band_idx}:")
        print(f"  dtype: {data.dtype}")
        print(f"  range: {data.min():.6f} - {data.max():.6f}")
        print(f"  mean: {data.mean():.6f}")
        print(f"  % valid (non-zero): {(data != 0).sum() / data.size * 100:.1f}%")
        print()
--- a/python_app/download_planet_missing_dates.py
+++ b/python_app/download_planet_missing_dates.py
@ -4,7 +4,7 @@ Purpose: Download Planet satellite data for missing dates only (skip existing fi
         Can be called from batch scripts or other Python scripts.
 Usage:
-  python download_planet_missing_dates.py --start 2022-01-01 --end 2025-12-15 --project angata
+  python download_planet_missing_dates.py --start 2026-01-17 --end 2026-12-20 --project angata
  python download_planet_missing_dates.py --start 2023-06-01 --end 2023-06-30 --project angata --dry-run
 Environment variables (alternative to CLI args):
--- a/python_app/merge_ci_data.R
+++ b/python_app/merge_ci_data.R
@ -1,29 +0,0 @@
 # Merge all CI RDS files into a single CSV
 library(tidyverse)
 # Paths
 ci_data_dir <- "r_app/experiments/ci_graph_exploration/CI_data"
 output_csv <- "python_app/lstm_ci_data_combined.csv"
 # Find all RDS files
 rds_files <- list.files(ci_data_dir, pattern = "\\.rds$", full.names = TRUE)
 print(paste("Found", length(rds_files), "RDS files"))
 # Load and combine all files
 combined_data <- tibble()
 for (file in rds_files) {
    filename <- basename(file)
    client_name <- sub("\\.rds$", "", filename)  # Extract client name from filename
    print(paste("Loading:", filename, "- Client:", client_name))
    data <- readRDS(file)
    data$client <- client_name
    combined_data <- bind_rows(combined_data, data)
 }
 print(paste("Total rows:", nrow(combined_data)))
 print(paste("Columns:", paste(names(combined_data), collapse = ", ")))
 # Write to CSV
 write.csv(combined_data, output_csv, row.names = FALSE)
 print(paste("✓ Saved to:", output_csv))
--- a/python_app/rgb_visualization.py
+++ b/python_app/rgb_visualization.py
@ -88,95 +88,116 @@ def load_field_boundaries(geojson_path, field_id):
        return None, None
-def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60):
+def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=None, debug=False):
    """
-    Find all tile files for target_date (or closest date) that overlap with field_boundary.
+    Find tile files with actual data (not cloud-masked) for target_date or nearest date.
-    Tile files are organized in subdirectories by date: 5x5/YYYY-MM-DD_HH/*.tif
+    Searches by increasing distance from target date until finding tiles with data.
    Avoids reusing dates in exclude_dates to ensure temporal diversity in grids.
    Args:
        target_date (pd.Timestamp): Target date to find tiles near
        tiff_dir (Path): Directory containing 5x5 date subdirectories
        field_boundary (shapely.Polygon): Field boundary for overlap detection
        days_window (int): Max days to search before/after target
        exclude_dates (list): List of dates to skip (avoid repetition)
        debug (bool): Enable detailed debugging output
    Returns:
        tuple: (list of tile paths, actual_date, days_diff)
               list: tile paths that overlap field
               pd.Timestamp: actual date of tiles found
               int: days difference from target to actual date found
    """
    target_date = pd.Timestamp(target_date)
    tiff_dir = Path(tiff_dir)
    exclude_dates = exclude_dates or []
    exclude_dates = [pd.Timestamp(d) for d in exclude_dates]
    if not tiff_dir.exists():
        if debug:
            print(f"        [DEBUG] TIFF dir does not exist: {tiff_dir}")
        return [], None, None
-    # Find all date subdirectories
+    # Build map of all available dates
-    available_dates = {}  # {date: ([tile file paths], actual_dir_name)}
+    available_dates = {}
-    min_size_mb = 12.0  # Empty files are ~11.56 MB
+    date_parse_errors = 0
    for date_dir in tiff_dir.iterdir():
        if not date_dir.is_dir():
            continue
        try:
            # Parse date from directory name (YYYY-MM-DD or YYYY-MM-DD_HH)
            dir_name = date_dir.name
            # Extract just the date part before underscore if it exists
            date_str = dir_name.split('_')[0]
            tile_date = pd.Timestamp(date_str)
            days_diff = (tile_date - target_date).days
            if abs(days_diff) > days_window:
                continue
            # Find all .tif files in this date directory
            tile_files = []
            for tile_file in date_dir.glob('*.tif'):
-                # Skip obviously empty files
+                # Include ALL tiles, regardless of size
-                file_size_mb = tile_file.stat().st_size / (1024 * 1024)
+                # Some tiles may be small but still contain valid data for specific fields
-                if file_size_mb >= min_size_mb:
+                tile_files.append(tile_file)
                    tile_files.append(tile_file)
            if tile_files:
                available_dates[tile_date] = (tile_files, dir_name)
-        except:
+        except Exception as e:
-            pass
+            date_parse_errors += 1
            if debug:
                print(f"        [DEBUG] Failed to parse date from {date_dir.name}: {e}")
    if debug:
        print(f"        [DEBUG] Found {len(available_dates)} dates with tile files, {date_parse_errors} parse errors")
        print(f"        [DEBUG] Date range: {min(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'} to {max(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'}")
    if not available_dates:
        return [], None, None
-    # Find closest date
+    # Search dates by increasing distance from target, looking for tiles with actual data
-    closest_date = min(available_dates.keys(), key=lambda d: abs((d - target_date).days))
+    sorted_dates = sorted(available_dates.keys(), key=lambda d: abs((d - target_date).days))
    days_diff = (closest_date - target_date).days
    tiles, _ = available_dates[closest_date]
-    # Filter tiles to only those that overlap field boundary
+    for search_date in sorted_dates:
-    if rasterio is None or field_boundary is None:
+        # Skip if this date was recently used (avoid temporal repetition)
-        # If rasterio not available, use all tiles (conservative approach)
+        if search_date in exclude_dates:
-        return tiles, closest_date, days_diff
+            continue
-    overlapping_tiles = []
+        tiles, dir_name = available_dates[search_date]
        days_diff = (search_date - target_date).days
-    for tile_path in tiles:
+        # Try to find overlapping tiles at this date
-        try:
+        overlapping_tiles = []
-            with rasterio.open(tile_path) as src:
+        tile_check_errors = 0
                # Get tile bounds
                tile_bounds = src.bounds  # (left, bottom, right, top)
                tile_geom = shgeom.box(*tile_bounds)
-                # Check if tile overlaps field
+        for tile_path in tiles:
-                if tile_geom.intersects(field_boundary):
+            try:
-                    overlapping_tiles.append(tile_path)
+                with rasterio.open(tile_path) as src:
-        except:
+                    tile_bounds = src.bounds
-            pass
+                    tile_geom = shgeom.box(*tile_bounds)
-    if not overlapping_tiles:
+                    # Debug first tile
-        # No overlapping tiles found, return all tiles for the closest date
+                    if debug and len(overlapping_tiles) == 0 and tile_check_errors == 0:
-        return tiles, closest_date, days_diff
+                        print(f"        [DEBUG] First tile check for {tile_path.name}:")
                        print(f"          Tile bounds: {tile_bounds}")
                        print(f"          Tile CRS: {src.crs}")
                        print(f"          Field bounds: {field_boundary.bounds}")
                        print(f"          Field geom type: {field_boundary.geom_type}")
                        print(f"          Intersects: {tile_geom.intersects(field_boundary)}")
-    return overlapping_tiles, closest_date, days_diff
+                    if tile_geom.intersects(field_boundary):
                        overlapping_tiles.append(tile_path)
            except Exception as e:
                tile_check_errors += 1
                if debug:
                    print(f"        [DEBUG] Error checking tile {tile_path.name}: {e}")
        if debug:
            print(f"        [DEBUG] Date {search_date.strftime('%Y-%m-%d')}: {len(tiles)} tiles, {len(overlapping_tiles)} overlap field, {tile_check_errors} errors")
        if overlapping_tiles:
            # Found overlapping tiles, return them
            print(f"        [FIND_TILES] Target: {target_date.strftime('%Y-%m-%d')}, Using: {search_date.strftime('%Y-%m-%d')} ({days_diff:+d}d), Tiles: {[Path(t).name for t in overlapping_tiles]}")
            return overlapping_tiles, search_date, days_diff
    # No overlapping tiles found at all
    if debug:
        print(f"        [DEBUG] No overlapping tiles found for {target_date.strftime('%Y-%m-%d')} within {len(sorted_dates)} searched dates")
    return [], None, None
 def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
@ -211,44 +232,44 @@ def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
            # For merged_final_tif: bands 1,2,3 are R,G,B
            bands_to_read = (1, 2, 3)
-            # Mask and read bands
+            # Mask and read bands - extract ONLY the specific field polygon
            geom = shgeom.mapping(field_boundary)
            try:
                masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read))
                # Stack RGB
                rgb = np.stack([masked_data[i] for i in range(3)], axis=-1)
-                
+            except (ValueError, RuntimeError) as e:
-                # Convert to float32 if not already
+                # Mask failed - field doesn't overlap this tile or geometry issue
-                rgb = rgb.astype(np.float32)
+                print(f"          MASK ERROR on {Path(tiff_path).name}: {str(e)[:50]}")
                # Normalize to 0-1 range
                # Data appears to be 8-bit (0-255 range) stored as float32
                # Check actual max value to determine normalization
                max_val = np.nanmax(rgb)
                if max_val > 0:
                    # If max is around 255 or less, assume 8-bit
                    if max_val <= 255:
                        rgb = rgb / 255.0
                    # If max is around 65535, assume 16-bit
                    elif max_val <= 65535:
                        rgb = rgb / 65535.0
                    # Otherwise divide by max to normalize
                    else:
                        rgb = rgb / max_val
                rgb = np.clip(rgb, 0, 1)
                # Check if result is all NaN
                if np.all(np.isnan(rgb)):
                    return None
                # Replace any remaining NaN with 0 (cloud/invalid pixels)
                rgb = np.nan_to_num(rgb, nan=0.0)
                return rgb
            except ValueError:
                return None
            # Convert to float32 if not already
            rgb = rgb.astype(np.float32)
            # Normalize to 0-1 range
            # Data appears to be 8-bit (0-255 range) stored as float32
            # Check actual max value to determine normalization
            max_val = np.nanmax(rgb)
            if max_val > 0:
                # If max is around 255 or less, assume 8-bit
                if max_val <= 255:
                    rgb = rgb / 255.0
                # If max is around 65535, assume 16-bit
                elif max_val <= 65535:
                    rgb = rgb / 65535.0
                # Otherwise divide by max to normalize
                else:
                    rgb = rgb / max_val
            rgb = np.clip(rgb, 0, 1)
            # Check if result is all NaN
            if np.all(np.isnan(rgb)):
                return None
            # Replace any remaining NaN with 0 (cloud/invalid pixels)
            rgb = np.nan_to_num(rgb, nan=0.0)
            return rgb
    except Exception as e:
        return None
@ -284,9 +305,16 @@ def load_and_composite_tiles_rgb(tile_paths, field_boundary):
        if len(rgb_arrays) == 1:
            composited = rgb_arrays[0]
        else:
-            # If multiple tiles, use max composite
+            # If multiple tiles, need to handle different shapes
-            stacked = np.stack(rgb_arrays, axis=0)
+            # Find common shape or use max/min approach that handles variable sizes
-            composited = np.max(stacked, axis=0)
+            try:
                # Try to stack if same shape
                stacked = np.stack(rgb_arrays, axis=0)
                composited = np.max(stacked, axis=0)
            except ValueError:
                # Different shapes - use the largest (most complete) tile
                # This happens when tiles are masked to different field areas
                composited = max(rgb_arrays, key=lambda x: x.size)
        composited = composited.astype(np.float32)
@ -307,6 +335,26 @@ def load_and_composite_tiles_rgb(tile_paths, field_boundary):
        return None
 def has_valid_rgb_data(rgb_data, threshold=0.05):
    """
    Check if RGB image has actual data (not black/empty).
    Returns True if max value > threshold (not all zeros/black).
    """
    if rgb_data is None:
        return False
    try:
        # Check if there's any variation in the RGB data
        data_max = np.nanmax(rgb_data)
        data_min = np.nanmin(rgb_data)
        # Image is valid if max > threshold AND there's variation
        has_data = data_max > threshold and (data_max - data_min) > 0.01
        return has_data
    except:
        return False
 def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field_boundary,
                            title, output_dir, harvest_type='registered', model_name=None, harvest_index=None):
    """
@ -334,91 +382,263 @@ def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field
    """
    harvest_date = pd.Timestamp(harvest_date)
-    # Target dates: 15 images at 7-day intervals (8 pre, 1 near, 6 post)
+    # Pre-allocate 15 image slots
-    target_dates = [
+    rgb_images = [None] * 15
-        harvest_date - timedelta(days=56),  # T-56d
+    days_offsets = [None] * 15
-        harvest_date - timedelta(days=49),  # T-49d
+    actual_dates = [None] * 15
-        harvest_date - timedelta(days=42),  # T-42d
+    used_dates = set()  # Use set for efficient lookups
        harvest_date - timedelta(days=35),  # T-35d
        harvest_date - timedelta(days=28),  # T-28d
        harvest_date - timedelta(days=21),  # T-21d
        harvest_date - timedelta(days=14),  # T-14d
        harvest_date - timedelta(days=7),   # T-7d
        harvest_date,                       # T~0d (near harvest)
        harvest_date + timedelta(days=7),   # T+7d
        harvest_date + timedelta(days=14),  # T+14d
        harvest_date + timedelta(days=21),  # T+21d
        harvest_date + timedelta(days=28),  # T+28d
        harvest_date + timedelta(days=35),  # T+35d
        harvest_date + timedelta(days=42),  # T+42d
        harvest_date + timedelta(days=56),  # T+56d (Note: non-standard to fill 5th col in row 3)
    ]
-    # Find TIFFs for each date
+    # STEP 0: Debug - List all available dates
-    rgb_images = []
+    print(f"    [STEP 0] Checking available TIFF dates in {tiff_dir}...")
-    days_offsets = []
+    available_dates = []
-    actual_dates = []  # Store actual dates of TIFFs found
+    if tiff_dir.exists():
        for date_folder in sorted(tiff_dir.iterdir()):
            if date_folder.is_dir():
                try:
                    date_obj = datetime.strptime(date_folder.name, '%Y-%m-%d').date()
                    available_dates.append(date_obj)
                except:
                    pass
    print(f"    Found {len(available_dates)} dates with data: {available_dates[:5]}... (showing first 5)")
-    for target in target_dates:
+    # STEP 1: Find anchor image (closest to predicted harvest date) FIRST
-        tile_paths, actual_date, days_diff = find_overlapping_tiles(target, tiff_dir, field_boundary, days_window=60)
+    # Search within ±14 days of predicted harvest date first, then expand if needed
    print(f"    [STEP 1] Finding anchor (closest to harvest {harvest_date.strftime('%Y-%m-%d')}, searching ±14 days)...")
    anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
        harvest_date, tiff_dir, field_boundary, days_window=14, exclude_dates=[], debug=False
    )
-        if not tile_paths or actual_date is None:
+    anchor_rgb = None
-            rgb_images.append(None)
+    anchor_idx = 8  # Position 8 is the center (T~0d / harvest date position)
-            days_offsets.append(None)
+    failed_anchor_dates = []  # Track dates that failed validation
            actual_dates.append(None)
            print(f"    ⚠ No tiles found within 60 days of {target.strftime('%Y-%m-%d')} with sufficient data")
            continue
-        rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
+    if anchor_tile_paths and anchor_date:
-        rgb_images.append(rgb)
+        anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
-        days_offsets.append(days_diff)
+        if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
-        actual_dates.append(actual_date)
+            rgb_images[anchor_idx] = anchor_rgb
-        
+            days_offsets[anchor_idx] = 0  # Anchor is reference point
-        if rgb is not None:
+            actual_dates[anchor_idx] = anchor_date
-            print(f"    ✓ Loaded {len(tile_paths)} tile(s) for {actual_date.strftime('%Y-%m-%d')} ({days_diff:+d}d from target)")
+            used_dates.add(anchor_date)
            print(f"    ✓ ANCHOR FOUND (±14d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
        else:
-            print(f"    ⚠ Loaded {len(tile_paths)} tile(s) but RGB data is None")
+            failed_anchor_dates.append(anchor_date)
            print(f"    ⚠ Found date {anchor_date.strftime('%Y-%m-%d')} within ±14d, but image has no valid data")
            print(f"    [RETRY] Expanding anchor search to ±60 days (excluding failed dates)...")
            anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
                harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=set(failed_anchor_dates), debug=False
            )
            if anchor_tile_paths and anchor_date:
                anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
                if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
                    rgb_images[anchor_idx] = anchor_rgb
                    days_offsets[anchor_idx] = 0  # Anchor is reference point
                    actual_dates[anchor_idx] = anchor_date
                    used_dates.add(anchor_date)
                    print(f"    ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
                else:
                    failed_anchor_dates.append(anchor_date)
                    print(f"    ✗ No valid anchor found even within ±60 days")
            else:
                print(f"    ✗ No tiles found for any date within ±60 days")
    else:
        print(f"    ⚠ No tiles found within ±14 days, expanding search...")
        anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
            harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=[], debug=False
        )
        if anchor_tile_paths and anchor_date:
            anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
            if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
                rgb_images[anchor_idx] = anchor_rgb
                days_offsets[anchor_idx] = 0  # Anchor is reference point
                actual_dates[anchor_idx] = anchor_date
                used_dates.add(anchor_date)
                print(f"    ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
            else:
                print(f"    ✗ No valid anchor found even within ±60 days")
        else:
            print(f"    ✗ No tiles found for any date within ±60 days")
    # STEP 2: Dynamically collect images BEFORE anchor date
    # Strategy: Go backwards from anchor with progressively larger search windows
    # Start at 7 days, then try 10, 15, 20, 30+ days apart
    print(f"    [STEP 2] Collecting images BEFORE anchor (going backwards, flexible spacing)...")
    before_positions = [7, 6, 5, 4, 3, 2, 1, 0]  # Will fill in reverse order (7→0)
    before_images = []  # (position, date, rgb, offset)
    pos_idx = 0  # Index into before_positions
    last_found_date = anchor_date
    # Progressive search offsets: try these day offsets in order
    search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120]  # Days before last found image
    while pos_idx < len(before_positions) and last_found_date.year >= 2024:
        found_this_iteration = False
        # Try each offset until we find a valid image
        for days_offset in search_offsets:
            search_target_date = last_found_date - timedelta(days=days_offset)
            tile_paths, actual_date, days_diff = find_overlapping_tiles(
                search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
            )
            if tile_paths and actual_date:
                rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
                if rgb is not None and has_valid_rgb_data(rgb):
                    # Found valid image!
                    overall_max = np.nanmax(rgb)
                    overall_min = np.nanmin(rgb)
                    offset_from_anchor = (actual_date - anchor_date).days
                    before_images.append((before_positions[pos_idx], actual_date, rgb, offset_from_anchor))
                    used_dates.add(actual_date)
                    last_found_date = actual_date  # Move backwards from this date
                    print(f"    ✓ Before[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")
                    pos_idx += 1
                    found_this_iteration = True
                    break  # Found one, stop trying larger offsets
        # If nothing found with any offset, we're done collecting before images
        if not found_this_iteration:
            break
    # Store collected before images
    for pos, actual_date, rgb, offset in before_images:
        rgb_images[pos] = rgb
        actual_dates[pos] = actual_date
        days_offsets[pos] = offset
    # STEP 3: Dynamically collect images AFTER anchor date
    # Strategy: Go forwards from anchor with progressively larger search windows
    # Start at 7 days, then try 10, 15, 20, 30+ days apart
    print(f"    [STEP 3] Collecting images AFTER anchor (going forwards, flexible spacing)...")
    after_positions = [9, 10, 11, 12, 13, 14]  # Will fill in order (9→14)
    after_images = []  # (position, date, rgb, offset)
    pos_idx = 0  # Index into after_positions
    last_found_date = anchor_date
    max_search_date = anchor_date + timedelta(days=200)  # Don't search beyond 200 days forward
    # Progressive search offsets: try these day offsets in order
    search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120]  # Days after last found image
    while pos_idx < len(after_positions) and last_found_date < max_search_date:
        found_this_iteration = False
        # Try each offset until we find a valid image
        for days_offset in search_offsets:
            search_target_date = last_found_date + timedelta(days=days_offset)
            # Don't search beyond max date
            if search_target_date > max_search_date:
                break
            tile_paths, actual_date, days_diff = find_overlapping_tiles(
                search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
            )
            if tile_paths and actual_date:
                rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
                if rgb is not None and has_valid_rgb_data(rgb):
                    # Found valid image!
                    overall_max = np.nanmax(rgb)
                    overall_min = np.nanmin(rgb)
                    offset_from_anchor = (actual_date - anchor_date).days
                    after_images.append((after_positions[pos_idx], actual_date, rgb, offset_from_anchor))
                    used_dates.add(actual_date)
                    last_found_date = actual_date  # Move forwards from this date
                    print(f"    ✓ After[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")
                    pos_idx += 1
                    found_this_iteration = True
                    break  # Found one, stop trying larger offsets
        # If nothing found with any offset, we're done collecting after images
        if not found_this_iteration:
            break
    # Store collected after images
    for pos, actual_date, rgb, offset in after_images:
        rgb_images[pos] = rgb
        actual_dates[pos] = actual_date
        days_offsets[pos] = offset
    # Create 5x3 grid plot (15 images)
    fig, axes = plt.subplots(3, 5, figsize=(25, 15))
-    fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")}',
+    
    # Build title with anchor offset information
    anchor_offset_from_harvest = (actual_dates[8] - harvest_date).days if actual_dates[8] is not None else None
    if anchor_offset_from_harvest is not None and anchor_offset_from_harvest != 0:
        anchor_info = f"(Anchor: {actual_dates[8].strftime('%Y-%m-%d')}, {anchor_offset_from_harvest:+d}d from predicted harvest)"
    else:
        anchor_info = f"(Exact match with anchor: {actual_dates[8].strftime('%Y-%m-%d')})" if actual_dates[8] is not None else ""
    fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")} {anchor_info}',
                fontsize=16, fontweight='bold')
    # Grid positions (5 columns, 3 rows = 15 images)
    positions = [
        ('T-56d', 0, 0), ('T-49d', 0, 1), ('T-42d', 0, 2), ('T-35d', 0, 3), ('T-28d', 0, 4),
-        ('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('T~0d', 1, 3), ('T+7d', 1, 4),
+        ('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('HARVEST', 1, 3), ('T+7d', 1, 4),
        ('T+14d', 2, 0), ('T+21d', 2, 1), ('T+28d', 2, 2), ('T+35d', 2, 3), ('T+42d', 2, 4),
    ]
-    for idx, (label, row, col) in enumerate(positions):  # All 15 images
+    for idx, (label, row, col) in enumerate(positions):
        ax = axes[row, col]
        if idx < len(rgb_images) and rgb_images[idx] is not None:
            rgb_data = rgb_images[idx]
-            # Debug: check data range
+            # Debug: check data range for ALL bands
-            data_min, data_max = np.nanmin(rgb_data), np.nanmax(rgb_data)
+            data_min = np.nanmin(rgb_data)
-            print(f"    DEBUG: {label} RGB range: {data_min:.4f} - {data_max:.4f}, shape: {rgb_data.shape}")
+            data_max = np.nanmax(rgb_data)
            data_mean = np.nanmean(rgb_data)
            data_std = np.nanstd(rgb_data)
            # Check per-band stats
            r_min, r_max, r_mean = np.nanmin(rgb_data[:,:,0]), np.nanmax(rgb_data[:,:,0]), np.nanmean(rgb_data[:,:,0])
            g_min, g_max, g_mean = np.nanmin(rgb_data[:,:,1]), np.nanmax(rgb_data[:,:,1]), np.nanmean(rgb_data[:,:,1])
            b_min, b_max, b_mean = np.nanmin(rgb_data[:,:,2]), np.nanmax(rgb_data[:,:,2]), np.nanmean(rgb_data[:,:,2])
            print(f"    DEBUG VALID {label} ({actual_dates[idx].strftime('%Y-%m-%d')}): RGB overall {data_min:.4f}-{data_max:.4f} (mean={data_mean:.4f}, std={data_std:.4f})")
            print(f"              R: {r_min:.4f}-{r_max:.4f} (μ={r_mean:.4f}), G: {g_min:.4f}-{g_max:.4f} (μ={g_mean:.4f}), B: {b_min:.4f}-{b_max:.4f} (μ={b_mean:.4f})")
            # Display with explicit vmin/vmax to handle normalized 0-1 data
            ax.imshow(rgb_data, vmin=0, vmax=1)
-            # Build title: label + offset + actual date
+            # Build title: show BOTH anchor offset AND harvest offset
-            offset_str = f"{days_offsets[idx]:+d}d" if days_offsets[idx] is not None else "?"
+            if days_offsets[idx] is not None:
-            date_str = actual_dates[idx].strftime('%Y-%m-%d') if actual_dates[idx] is not None else "No Date"
+                offset_from_anchor = days_offsets[idx]
-            ax.set_title(f'{label}\n{offset_str}\n{date_str}', fontsize=10, fontweight='bold')
+                offset_from_harvest = (actual_dates[idx] - harvest_date).days
-            # Add red box around harvest date (T~0d at row=1, col=3)
+                if idx == 8:  # ANCHOR/HARVEST position
-            if label == 'T~0d':
+                    if offset_from_harvest == 0:
                        offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')})"
                    else:
                        offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')}, {offset_from_harvest:+d}d from predicted)"
                else:
                    # Show both offsets: from anchor and from harvest
                    offset_str = f"{offset_from_anchor:+d}d from anchor\n{offset_from_harvest:+d}d from harvest\n{actual_dates[idx].strftime('%Y-%m-%d')}"
            else:
                offset_str = "No Data"
            ax.set_title(offset_str, fontsize=9, fontweight='bold')
            # Add red box around the ANCHOR IMAGE (position 8 is harvest/anchor)
            if idx == 8:  # Position 8 is the anchor
                for spine in ax.spines.values():
                    spine.set_edgecolor('red')
                    spine.set_linewidth(4)
        else:
            ax.text(0.5, 0.5, 'No Data', ha='center', va='center', fontsize=12, color='gray')
-            ax.set_title(label, fontsize=10)
+            ax.set_title('No Data', fontsize=10)
            print(f"    DEBUG EMPTY {label}: No image data collected")
-            # Add red box for T~0d even if no data
+            # Add red box for anchor position even if no data
-            if label == 'T~0d':
+            if idx == 8:  # Position 8 is the anchor
                for spine in ax.spines.values():
                    spine.set_edgecolor('red')
                    spine.set_linewidth(4)
--- a/r_app/10_create_master_grid_and_split_tiffs.R
+++ b/r_app/10_create_master_grid_and_split_tiffs.R
@ -6,6 +6,8 @@
 #' 2. Create master 5×5 grid covering all TIFFs
 #' 3. Split each daily TIFF into 25 tiles using the master grid
 #' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
 #' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18
 library(terra)
 library(sf)
@ -32,7 +34,7 @@ if (length(args) >= 2) {
 }
 PROJECT <- "angata"
-TIFF_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
+TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
 # GRID SIZE CONFIGURATION - Change this to use different grid sizes
 # Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
@ -42,10 +44,10 @@ GRID_NCOLS <- 5
 # Construct grid-specific subfolder path
 GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
-OUTPUT_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
+OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
 # Load field boundaries for overlap checking
-GEOJSON_PATH <- file.path("..", "laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
+GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
 cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
 cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
--- a/r_app/20_ci_extraction.R
+++ b/r_app/20_ci_extraction.R
@ -13,13 +13,13 @@
 #
 # Examples:
 #   # Angata 8-band data (with UDM cloud masking)
-#   & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/02_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
+#   & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
 #   
 #   # Aura 4-band data
-#   Rscript 02_ci_extraction.R 2025-11-26 7 aura merged_tif
+#   Rscript 20_ci_extraction.R 2025-11-26 7 aura merged_tif
 #
 #   # Auto-detects and uses tiles if available:
-#   Rscript 02_ci_extraction.R 2026-01-02 7 angata  (uses tiles if daily_tiles_split/ exists)
+#   Rscript 20_ci_extraction.R 2026-01-02 7 angata  (uses tiles if daily_tiles_split/ exists)
 # 1. Load required packages
 # -----------------------
--- a/r_app/30_interpolate_growth_model.R
+++ b/r_app/30_interpolate_growth_model.R
@ -8,7 +8,7 @@
 #
 # Usage: Rscript interpolate_growth_model.R [project_dir]
 #   - project_dir: Project directory name (e.g., "chemba")
-# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/03_interpolate_growth_model.R angata
+# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata
 # 1. Load required packages
 # -----------------------
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@ -186,8 +186,15 @@ main <- function() {
  end_date <- if (length(args) >= 1 && !is.na(args[1])) {
    as.Date(args[1])
  } else if (exists("end_date", envir = .GlobalEnv)) {
-    # For recursive calls, use the end_date that was set in the global environment
+    global_date <- get("end_date", envir = .GlobalEnv)
-    get("end_date", envir = .GlobalEnv)
+    # Check if it's a valid Date with length > 0
    if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) {
      global_date
    } else if (exists("end_date_str", envir = .GlobalEnv)) {
      as.Date(get("end_date_str", envir = .GlobalEnv))
    } else {
      Sys.Date()
    }
  } else if (exists("end_date_str", envir = .GlobalEnv)) {
    as.Date(get("end_date_str", envir = .GlobalEnv))
  } else {
@ -210,10 +217,15 @@ main <- function() {
    7
  }
  # Validate end_date is a proper Date object
  if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) {
    stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date))
  }
  assign("project_dir", project_dir, envir = .GlobalEnv)
  assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)
-  message("\n" %+% strrep("=", 70))
+  message("\n", strrep("=", 70))
  message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
  message(strrep("=", 70))
  message("Date:", format(end_date, "%Y-%m-%d"))
@ -238,7 +250,7 @@ main <- function() {
  # ========== PER-FIELD ANALYSIS (SC-64) ==========
-  message("\n" %+% strrep("-", 70))
+  message("\n", strrep("-", 70))
  message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
  message(strrep("-", 70))
@ -694,9 +706,9 @@ main <- function() {
  # ========== FINAL SUMMARY ==========
-  cat("\n" %+% strrep("=", 70) %+% "\n")
+  cat("\n", strrep("=", 70), "\n")
  cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
-  cat(strrep("=", 70) %+% "\n")
+  cat(strrep("=", 70), "\n")
  cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
  cat("Excel export:", export_paths$excel, "\n")
  cat("RDS export:", export_paths$rds, "\n")
--- a/r_app/run_full_pipeline.R
+++ b/r_app/run_full_pipeline.R
@ -1,31 +1,40 @@
 # ==============================================================================
 # FULL PIPELINE RUNNER
 # ==============================================================================
-# Runs scripts 02, 03, 04, 09 (KPIs), 09 (Weekly), and 10 (CI Report Simple)
+# Mixed Python/R pipeline:
 # 1. Python: Download Planet images
 # 2. R 10: Create master grid and split TIFFs
 # 3. R 20: CI Extraction
 # 4. R 21: Convert CI RDS to CSV
 # 5. R 30: Interpolate growth model
 # 6. Python 31: Harvest imminent weekly
 # 7. R 40: Mosaic creation
 # 8. R 80: Calculate KPIs
 # 
 # ==============================================================================
 # HOW TO RUN THIS SCRIPT
 # ==============================================================================
 # 
-# In PowerShell or Command Prompt:
+# Run from the smartcane/ directory:
 # 
 # Option 1 (Recommended - shows real-time output):
-#   Rscript run_full_pipeline.R
+#   Rscript r_app/run_full_pipeline.R
 # 
 # Option 2 (Full path to Rscript - use & in PowerShell for paths with spaces):
-#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" run_full_pipeline.R
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
 # 
 # Option 3 (Batch mode - output saved to .Rout file):
-#   R CMD BATCH --vanilla run_full_pipeline.R
+#   R CMD BATCH --vanilla r_app/run_full_pipeline.R
 # 
 # ==============================================================================
 # ==============================================================================
 # *** EDIT THESE VARIABLES ***
-end_date <- "2025-12-24"        # or specify: "2025-12-02", Sys.Date()
+end_date <- as.Date("2026-01-27")     # or specify: as.Date("2026-01-27") , Sys.Date()
 offset <- 7                        # days to look back
 project_dir <- "angata"              # project name: "esa", "aura", "angata", "chemba"
 data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif"
 force_rerun <- FALSE               # Set to TRUE to force all scripts to run even if outputs exist
 # ***************************
 # Format dates
@ -35,117 +44,350 @@ end_date_str <- format(as.Date(end_date), "%Y-%m-%d")
 pipeline_success <- TRUE
 # ==============================================================================
-# SCRIPT 02: CI EXTRACTION
+# INTELLIGENT CHECKING: What has already been completed?
 # ==============================================================================
-cat("\n========== RUNNING SCRIPT 02: CI EXTRACTION ==========\n")
+cat("\n========== CHECKING EXISTING OUTPUTS ==========\n")
 tryCatch({
  source("r_app/02_ci_extraction.R")
  main()  # Call the main function
  cat("✓ Script 02 completed\n")
 }, error = function(e) {
  cat("✗ Error in Script 02:", e$message, "\n")
  pipeline_success <<- FALSE
 })
-# ==============================================================================
+# Check Script 10 outputs (tiled splits)
-# SCRIPT 03: INTERPOLATE GROWTH MODEL
+tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
-# ==============================================================================
+tiles_dates <- if (dir.exists(tiles_dir)) {
-cat("\n========== RUNNING SCRIPT 03: INTERPOLATE GROWTH MODEL ==========\n")
+  list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
 tryCatch({
  source("r_app/03_interpolate_growth_model.R")
  main()  # Call the main function
  cat("✓ Script 03 completed\n")
 }, error = function(e) {
  cat("✗ Error in Script 03:", e$message, "\n")
  pipeline_success <<- FALSE
 })
 # ==============================================================================
 # SCRIPT 04: MOSAIC CREATION
 # ==============================================================================
 cat("\n========== RUNNING SCRIPT 04: MOSAIC CREATION ==========\n")
 tryCatch({
  source("r_app/04_mosaic_creation.R")
  main()  # Call the main function
  cat("✓ Script 04 completed\n")
 }, error = function(e) {
  cat("✗ Error in Script 04:", e$message, "\n")
  pipeline_success <<- FALSE
 })
 # ==============================================================================
 # SCRIPT 09: CALCULATE KPIs
 # ==============================================================================
 cat("\n========== RUNNING SCRIPT 09: CALCULATE KPIs ==========\n")
 tryCatch({
  source("r_app/09_calculate_kpis.R")
  main()  # Call the main function
  cat("✓ Script 09 (KPIs) completed\n")
 }, error = function(e) {
  cat("✗ Error in Script 09 (KPIs):", e$message, "\n")
  pipeline_success <<- FALSE
 })
 # ==============================================================================
 # SCRIPT 09: FIELD ANALYSIS WEEKLY
 # ==============================================================================
 # Only run field analysis weekly for angata project
 if (project_dir == "angata") {
    cat("\n========== RUNNING SCRIPT 09: FIELD ANALYSIS WEEKLY ==========\n")
    tryCatch({
      source("r_app/09_field_analysis_weekly.R")
      main()  # Call the main function
      cat("✓ Script 09 (Weekly) completed\n")
    }, error = function(e) {
      cat("✗ Error in Script 09 (Weekly):", e$message, "\n")
      pipeline_success <<- FALSE
    })
 } else {
-    cat("\n========== SKIPPING SCRIPT 09: FIELD ANALYSIS WEEKLY (only runs for angata) ==========\n")
+  c()
 }
 cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates)))
 # Check Script 20 outputs (CI extraction) - daily RDS files
 ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
 ci_files <- if (dir.exists(ci_daily_dir)) {
  list.files(ci_daily_dir, pattern = "\\.rds$")
 } else {
  c()
 }
 cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
 # Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this
 # Instead, check if CI RDS files exist - if they do, 21 should also run
 # For now, just note that CSV is time-dependent, not a good skip indicator
 cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")
 # Check Script 40 outputs (mosaics in weekly_tile_max/5x5)
 mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
 mosaic_files <- if (dir.exists(mosaic_dir)) {
  list.files(mosaic_dir, pattern = "\\.tif$")
 } else {
  c()
 }
 cat(sprintf("Script 40: %d mosaic files exist\n", length(mosaic_files)))
 # Check Script 80 outputs (KPIs in reports/kpis/field_stats)
 kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
 kpi_files <- if (dir.exists(kpi_dir)) {
  list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
 } else {
  c()
 }
 cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))
 # Determine if scripts should run based on outputs
 skip_10 <- length(tiles_dates) > 0 && !force_rerun
 skip_20 <- length(ci_files) > 0 && !force_rerun
 skip_21 <- length(ci_files) > 0 && !force_rerun  # Skip 21 if 20 is skipped
 skip_40 <- length(mosaic_files) > 0 && !force_rerun
 skip_80 <- FALSE  # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks
 cat("\nSkipping decisions:\n")
 cat(sprintf("  Script 10: %s\n", if(skip_10) "SKIP (tiles exist)" else "RUN"))
 cat(sprintf("  Script 20: %s\n", if(skip_20) "SKIP (CI exists)" else "RUN"))
 cat(sprintf("  Script 21: %s\n", if(skip_21) "SKIP (CI exists)" else "RUN"))
 cat(sprintf("  Script 40: %s\n", if(skip_40) "SKIP (mosaics exist)" else "RUN"))
 cat(sprintf("  Script 80: %s\n", if(skip_80) "SKIP (KPIs exist)" else "RUN"))
 # ==============================================================================
-# SCRIPT 91: CI REPORT ANGATA (only for angata)
+# PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY)
 # ==============================================================================
-if (project_dir == "angata") {
+cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n")
-    cat("\n========== RUNNING SCRIPT 91: CI REPORT ANGATA ==========\n")
+tryCatch({
-    if (pipeline_success) {
+  # Setup paths
-        tryCatch({
+  base_path <- file.path("laravel_app", "storage", "app", project_dir)
-          rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd", 
+  merged_tifs_dir <- file.path(base_path, data_source)
-                          output_format = "word_document",
+  
-                          params = list(data_dir = project_dir, report_date = end_date_str))
+  # Get existing dates from raw TIFFs
-          cat("✓ Script 91 (Report) completed\n")
+  existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
-        }, error = function(e) {
+  existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)
-          cat("✗ Error in Script 91 (Report):", e$message, "\n")
+  
-        })
+  # Get existing dates from tiles (better indicator of completion)
-    } else {
+  existing_tile_dates <- tiles_dates
-        cat("✗ Skipping Script 91: Previous pipeline scripts failed\n")
+  
  # Find missing dates in the window
  start_date <- end_date - offset
  date_seq <- seq(start_date, end_date, by = "day")
  target_dates <- format(date_seq, "%Y-%m-%d")
  # Only download if tiles don't exist yet (more reliable than checking raw TIFFs)
  missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]
  cat(sprintf("  Existing tiled dates: %d\n", length(existing_tile_dates)))
  cat(sprintf("  Missing dates in window: %d\n", length(missing_dates)))
  # Download each missing date
  download_count <- 0
  download_failed <- 0
  if (length(missing_dates) > 0) {
    # Save current directory
    original_dir <- getwd()
    # Change to python_app directory so relative paths work correctly
    setwd("python_app")
    for (date_str in missing_dates) {
      cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str)
      result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE)
      if (result == 0) {
        download_count <- download_count + 1
      } else {
        download_failed <- download_failed + 1
      }
    }
    # Change back to original directory
    setwd(original_dir)
  }
  cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed))
  if (download_failed > 0) {
    cat("⚠ Some downloads failed, but continuing pipeline\n")
  }
  # Force Script 10 to run ONLY if downloads actually succeeded (not just attempted)
  if (download_count > 0) {
    skip_10 <- FALSE
  }
 }, error = function(e) {
  cat("✗ Error in planet download:", e$message, "\n")
  pipeline_success <<- FALSE
 })
 # ==============================================================================
 # SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs
 # ==============================================================================
 if (pipeline_success && !skip_10) {
  cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n")
  tryCatch({
    # Set environment variables for the script (Script 10 uses these for filtering)
    assign("PROJECT", project_dir, envir = .GlobalEnv)
    # Suppress verbose per-date output, show only summary
    sink(nullfile())
    source("r_app/10_create_master_grid_and_split_tiffs.R")
    sink()
    # Verify output
    tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
    if (dir.exists(tiles_dir)) {
      subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE)
      cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs)))
    } else {
      cat("✓ Script 10 completed\n")
    }
  }, error = function(e) {
    sink()
    cat("✗ Error in Script 10:", e$message, "\n")
    pipeline_success <<- FALSE
  })
 } else if (skip_10) {
  cat("\n========== SKIPPING SCRIPT 10 (tiles already exist) ==========\n")
 }
 # ==============================================================================
-# SCRIPT 10: CI REPORT (SIMPLE)
+# SCRIPT 20: CI EXTRACTION
 # ==============================================================================
-# Only run CI report for non-angata projects
+if (pipeline_success && !skip_20) {
  cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n")
  tryCatch({
    # Set environment variables for the script
    assign("end_date", end_date, envir = .GlobalEnv)
    assign("offset", offset, envir = .GlobalEnv)
    assign("project_dir", project_dir, envir = .GlobalEnv)
    assign("data_source", data_source, envir = .GlobalEnv)
    source("r_app/20_ci_extraction.R")
    main()  # Call main() to execute the script with the environment variables
-if (project_dir != "angata") {
+    # Verify CI output was created
-     cat("\n========== RUNNING SCRIPT 10: CI REPORT SIMPLE ==========\n")
+    ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
-     if (pipeline_success) {
+    if (dir.exists(ci_daily_dir)) {
-         tryCatch({
+      files <- list.files(ci_daily_dir, pattern = "\\.rds$")
-           rmarkdown::render("r_app/10_CI_report_with_kpis_simple.Rmd", 
+      cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files)))
-                           output_format = "word_document",
+    } else {
-                           params = list(data_dir = project_dir, report_date = end_date_str))
+      cat("✓ Script 20 completed\n")
-           cat("✓ Script 10 (Report) completed\n")
+    }
-         }, error = function(e) {
+  }, error = function(e) {
-           cat("✗ Error in Script 10 (Report):", e$message, "\n")
+    cat("✗ Error in Script 20:", e$message, "\n")
-         })
+    pipeline_success <<- FALSE
-     } else {
+  })
-         cat("✗ Skipping Script 10: Previous pipeline scripts failed\n")
+} else if (skip_20) {
-     }
+  cat("\n========== SKIPPING SCRIPT 20 (CI already extracted) ==========\n")
- } else {
+}
-     cat("\n========== SKIPPING SCRIPT 10: CI REPORT SIMPLE (not applicable for angata) ==========\n")
+
- }
+# ==============================================================================
 # SCRIPT 21: CONVERT CI RDS TO CSV
 # ==============================================================================
 if (pipeline_success && !skip_21) {
  cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n")
  tryCatch({
    # Set environment variables for the script
    assign("end_date", end_date, envir = .GlobalEnv)
    assign("offset", offset, envir = .GlobalEnv)
    assign("project_dir", project_dir, envir = .GlobalEnv)
    source("r_app/21_convert_ci_rds_to_csv.R")
    main()  # Call main() to execute the script with the environment variables
    # Verify CSV output was created
    ci_csv_path <- file.path("laravel_app", "storage", "app", project_dir, "ci_extracted")
    if (dir.exists(ci_csv_path)) {
      csv_files <- list.files(ci_csv_path, pattern = "\\.csv$")
      cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files)))
    } else {
      cat("✓ Script 21 completed\n")
    }
  }, error = function(e) {
    cat("✗ Error in Script 21:", e$message, "\n")
    pipeline_success <<- FALSE
  })
 } else if (skip_21) {
  cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n")
 }
 # ==============================================================================
 # SCRIPT 30: INTERPOLATE GROWTH MODEL
 # ==============================================================================
 if (pipeline_success) {
  cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
  tryCatch({
    # Set environment variables for the script
    assign("end_date", end_date, envir = .GlobalEnv)
    assign("offset", offset, envir = .GlobalEnv)
    assign("project_dir", project_dir, envir = .GlobalEnv)
    assign("data_source", data_source, envir = .GlobalEnv)
    source("r_app/30_interpolate_growth_model.R")
    main()  # Call main() to execute the script with the environment variables
    # Verify interpolated output
    growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
    if (dir.exists(growth_dir)) {
      files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
      cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
    } else {
      cat("✓ Script 30 completed\n")
    }
  }, error = function(e) {
    cat("✗ Error in Script 30:", e$message, "\n")
    pipeline_success <<- FALSE
  })
 }
 # ==============================================================================
 # PYTHON 31: HARVEST IMMINENT WEEKLY
 # ==============================================================================
 if (pipeline_success) {
  cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n")
  tryCatch({
    # Run Python script in pytorch_gpu conda environment
    # Script expects positional project name (not --project flag)
    # Run from smartcane root so conda can find the environment
    cmd <- sprintf('conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s', project_dir)
    cat("DEBUG: Running command:", cmd, "\n")
    result <- system(cmd)
    if (result == 0) {
      # Verify harvest output - check for THIS WEEK's specific file
      current_week <- as.numeric(format(end_date, "%V"))
      current_year <- as.numeric(format(end_date, "%Y"))
      expected_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats",
                                 sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
      if (file.exists(expected_file)) {
        cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", current_week))
      } else {
        cat("✓ Script 31 completed (check if harvest.xlsx is available)\n")
      }
    } else {
      cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n")
    }
  }, error = function(e) {
    setwd(original_dir)
    cat("⚠ Script 31 error:", e$message, "\n")
  })
 }
 # ==============================================================================
 # SCRIPT 40: MOSAIC CREATION
 # ==============================================================================
 if (pipeline_success && !skip_40) {
  cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n")
  tryCatch({
    # Set environment variables for the script
    assign("end_date", end_date, envir = .GlobalEnv)
    assign("offset", offset, envir = .GlobalEnv)
    assign("project_dir", project_dir, envir = .GlobalEnv)
    assign("data_source", data_source, envir = .GlobalEnv)
    source("r_app/40_mosaic_creation.R")
    main()  # Call main() to execute the script with the environment variables
    # Verify mosaic output
    mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
    if (dir.exists(mosaic_dir)) {
      files <- list.files(mosaic_dir, pattern = "\\.tif$")
      cat(sprintf("✓ Script 40 completed - generated %d mosaic files\n", length(files)))
    } else {
      cat("✓ Script 40 completed\n")
    }
  }, error = function(e) {
    cat("✗ Error in Script 40:", e$message, "\n")
    pipeline_success <<- FALSE
  })
 } else if (skip_40) {
  cat("\n========== SKIPPING SCRIPT 40 (mosaics already created) ==========\n")
 }
 # ==============================================================================
 # SCRIPT 80: CALCULATE KPIs
 # ==============================================================================
 if (pipeline_success) {  # Always run Script 80 - it calculates KPIs for the current week
  cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n")
  tryCatch({
    # Set environment variables for the script (Script 80's main() uses these as fallbacks)
    # NOTE: end_date is already a Date, just assign directly without as.Date()
    assign("end_date", end_date, envir = .GlobalEnv)
    assign("end_date_str", end_date_str, envir = .GlobalEnv)
    assign("offset", offset, envir = .GlobalEnv)
    assign("project_dir", project_dir, envir = .GlobalEnv)
    assign("data_source", data_source, envir = .GlobalEnv)
    source("r_app/80_calculate_kpis.R")
    main()  # Call main() to execute the script with the environment variables
    # Verify KPI output
    kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
    if (dir.exists(kpi_dir)) {
      files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
      cat(sprintf("✓ Script 80 completed - generated %d KPI files\n", length(files)))
    } else {
      cat("✓ Script 80 completed\n")
    }
  }, error = function(e) {
    cat("✗ Error in Script 80:", e$message, "\n")
    cat("Full error:\n")
    print(e)
    pipeline_success <<- FALSE
  })
 }
 # ==============================================================================
 # SUMMARY
@ -154,4 +396,9 @@ cat("\n========== PIPELINE COMPLETE ==========\n")
 cat(sprintf("Project: %s\n", project_dir))
 cat(sprintf("End Date: %s\n", end_date_str))
 cat(sprintf("Offset: %d days\n", offset))
-cat("Scripts executed: 02, 03, 04, 09 (KPIs), 09 (Weekly), 10 (CI Report)\n")
+if (pipeline_success) {
  cat("Status: ✓ All scripts completed successfully\n")
 } else {
  cat("Status: ✗ Pipeline failed - check errors above\n")
 }
 cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n")