diff --git a/python_app/31_harvest_imminent_weekly.py b/python_app/31_harvest_imminent_weekly.py index 8722eda..39d3eb9 100644 --- a/python_app/31_harvest_imminent_weekly.py +++ b/python_app/31_harvest_imminent_weekly.py @@ -1,4 +1,4 @@ -""" +r""" Script: 02_harvest_imminent_weekly.py Purpose: WEEKLY MONITORING - Run WEEKLY/DAILY to get real-time harvest status for all fields @@ -38,12 +38,12 @@ Use Cases: - Feed into 09b script for weekly dashboard reports Usage: - python 02_harvest_imminent_weekly.py [project_name] + python python_app/31_harvest_imminent_weekly.py angata Examples: - python 02_harvest_imminent_weekly.py angata - python 02_harvest_imminent_weekly.py esa - python 02_harvest_imminent_weekly.py chemba + python python_app/31_harvest_imminent_weekly.py angata + python python_app/31_harvest_imminent_weekly.py esa + python python_app/31_harvest_imminent_weekly.py chemba If no project specified, defaults to 'angata' """ @@ -264,7 +264,7 @@ def main(): # [3] Load model (from python_app directory) print("\n[3/5] Loading Model 307...") - model_dir = Path(".") # Current directory is python_app/, contains model.pt, config.json, scalers.pkl + model_dir = Path("python_app") # Model files located in python_app/ directory model, config, scalers = load_model_and_config(model_dir) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f" Device: {device}") diff --git a/python_app/batch_rgb_validation_top_fields.py b/python_app/batch_rgb_validation_top_fields_v3.py similarity index 69% rename from python_app/batch_rgb_validation_top_fields.py rename to python_app/batch_rgb_validation_top_fields_v3.py index cccb9ae..259a081 100644 --- a/python_app/batch_rgb_validation_top_fields.py +++ b/python_app/batch_rgb_validation_top_fields_v3.py @@ -1,23 +1,25 @@ #!/usr/bin/env python """ -Batch RGB Validation for Top 50 Largest Fields +Batch RGB Validation for Top 100 Largest Fields - V3 -Generates 5x3 RGB temporal grids for the latest complete harvest season of the 50 largest fields. +Same as v1 but with dynamic image selection (checks for actual data, skips empty/black images). + +Generates 5x3 RGB temporal grids for the latest complete harvest season of the 100 largest fields. Uses actual season_end dates from harvest.xlsx for visual validation of field conditions at harvest. Configuration: - GeoJSON: pivot.geojson (defines field boundaries and sizes) - Harvest data: harvest.xlsx (season_end dates for completed harvests) -- CI data: ci_data_for_python.csv - Output: RGB directory with field_name_YYYYMMDD_harvest_rgb.png Usage: - python batch_rgb_validation_top_fields.py + python batch_rgb_validation_top_fields_v3.py --field 1 + python batch_rgb_validation_top_fields_v3.py Output: - Saves 5x3 RGB grids to: laravel_app/storage/app/angata/RGB/ - Filenames: field___harvest_rgb.png - - Each grid shows 15 images at 7-day intervals around the season_end date + - Each grid shows 15 images around the harvest date (dynamic date selection, skips empty images) """ import json @@ -26,6 +28,7 @@ import pandas as pd from pathlib import Path from datetime import datetime, timedelta import sys +import argparse # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent)) @@ -189,16 +192,26 @@ def load_harvest_dates_from_xlsx(harvest_xlsx_path, top_50_fields_df): def main(): + parser = argparse.ArgumentParser(description='RGB validation of harvest dates using satellite imagery (v3 - dynamic)') + parser.add_argument('--field', type=str, default=None, help='Specific field ID to validate (e.g., "1" or "10022")') + parser.add_argument('--project', type=str, default='angata', help='Project name (default: angata)') + + args = parser.parse_args() + print("="*90) - print("BATCH RGB VALIDATION - TOP 50 LARGEST FIELDS") - print("Visual inspection of latest harvest dates from harvest.xlsx using RGB imagery") + if args.field: + print(f"RGB VALIDATION V3 - SINGLE FIELD: {args.field}") + else: + print("RGB VALIDATION V3 - TOP 50 LARGEST FIELDS") + print("Visual inspection of harvest dates from harvest.xlsx using RGB imagery (dynamic selection)") print("="*90) # Configuration - geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson") - harvest_xlsx = Path("laravel_app/storage/app/angata/Data/harvest.xlsx") - output_dir = Path("laravel_app/storage/app/angata/RGB") - tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5") + project = args.project + geojson_path = Path(f"laravel_app/storage/app/{project}/Data/pivot.geojson") + harvest_xlsx = Path(f"laravel_app/storage/app/{project}/Data/harvest.xlsx") + output_dir = Path(f"laravel_app/storage/app/{project}/RGB") + tiff_dir = Path(f"laravel_app/storage/app/{project}/merged_final_tif/5x5") # Verify paths if not geojson_path.exists(): @@ -213,18 +226,83 @@ def main(): output_dir.mkdir(parents=True, exist_ok=True) - # Step 1: Load GeoJSON and get top 50 largest fields - print("\n[1/4] Loading GeoJSON and identifying top 50 largest fields...") + # Handle single field mode + if args.field: + print(f"\n[1/3] Loading harvest data for field {args.field}...") + + harvest_df = pd.read_excel(harvest_xlsx) + harvest_df['season_end'] = pd.to_datetime(harvest_df['season_end'], errors='coerce') + harvest_df['field'] = harvest_df['field'].astype(str).str.strip() + + field_records = harvest_df[harvest_df['field'] == args.field] + field_records = field_records[field_records['season_end'].notna()] + + if len(field_records) == 0: + print(f"✗ No harvest data found for field {args.field}") + return + + # Get latest harvest for this field + latest_idx = field_records['season_end'].idxmax() + latest_row = field_records.loc[latest_idx] + harvest_date = latest_row['season_end'] + + print(f" ✓ Found harvest: {harvest_date.strftime('%Y-%m-%d')}") + + # Load field name from GeoJSON + print(f"\n[2/3] Loading field name from GeoJSON...") + with open(geojson_path) as f: + geojson_data = json.load(f) + + field_name = f"field_{args.field}" + for feature in geojson_data.get('features', []): + props = feature.get('properties', {}) + if str(props.get('field', '')) == args.field: + field_name = props.get('name', field_name) + break + + print(f" ✓ Field name: {field_name}") + + # Generate RGB grid + print(f"\n[3/3] Generating RGB validation grid (v3 dynamic)...") + results = generate_rgb_grids( + field_data=None, + field_id=args.field, + registered_harvest_dates=[], + predicted_harvest_dates=[ + { + 'harvest_date': harvest_date, + 'model_name': 'harvest_xlsx' + } + ], + output_dir=str(output_dir), + tiff_dir=str(tiff_dir), + geojson_path=str(geojson_path) + ) + + print("\n" + "="*90) + if results['predicted']: + print(f"✓ RGB grid generated successfully!") + print(f" Field: {field_name} (ID: {args.field})") + print(f" Harvest date: {harvest_date.strftime('%Y-%m-%d')}") + print(f" Output: {output_dir}") + else: + print(f"⚠ No RGB grid generated (no imagery available)") + print("="*90) + return + + # Batch mode for top 100 fields + print(f"\n[1/4] Loading GeoJSON and identifying top 100 largest fields...") + fields_df = load_geojson_and_calculate_areas(geojson_path) if fields_df is None: return - top_50_fields = fields_df.head(50) - print(f" ✓ Selected {len(top_50_fields)} largest fields for processing") + top_100_fields = fields_df.head(100) + print(f" ✓ Selected {len(top_100_fields)} largest fields for processing") # Step 2: Load harvest dates from Excel print("\n[2/4] Loading harvest dates from Excel (latest complete seasons)...") - harvest_dates = load_harvest_dates_from_xlsx(harvest_xlsx, top_50_fields) + harvest_dates = load_harvest_dates_from_xlsx(harvest_xlsx, top_100_fields) if len(harvest_dates) == 0: print("✗ No harvest dates found in Excel file") @@ -237,7 +315,7 @@ def main(): print(f" ... and {len(harvest_dates) - 5} more") # Step 3: Generate RGB grids for each field - print("\n[3/4] Generating RGB validation grids...") + print("\n[3/4] Generating RGB validation grids (v3 dynamic)...") rgb_count = 0 for idx, (field_id, harvest_info) in enumerate(harvest_dates.items(), 1): diff --git a/python_app/create_rgb_evaluation_template.py b/python_app/create_rgb_evaluation_template.py new file mode 100644 index 0000000..dea094c --- /dev/null +++ b/python_app/create_rgb_evaluation_template.py @@ -0,0 +1,193 @@ +""" +Create an Excel evaluation template for RGB harvest date predictions. +Parses field names and dates directly from RGB image filenames. +""" +import os +import glob +import pandas as pd +from openpyxl import Workbook +from openpyxl.styles import Font, PatternFill, Alignment, Border, Side +from openpyxl.utils import get_column_letter +import re +from datetime import datetime + +# Configuration +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +RGB_DIR = os.path.join(BASE_DIR, "laravel_app", "storage", "app", "angata", "RGB") +OUTPUT_PATH = os.path.join(BASE_DIR, "laravel_app", "storage", "app", "angata", "RGB_Evaluation_Template.xlsx") + +# Evaluators +EVALUATORS = ["Joey", "Daniel", "Nik", "Dimitra", "Timon"] + +def parse_rgb_filenames(): + """ + Parse field names and harvest dates from RGB image filenames. + Expected format: field_{field_id or name}_{YYYYMMDD}_harvest_xlsx_harvest_rgb.png + """ + fields_data = [] + + # Find all RGB images + rgb_files = glob.glob(os.path.join(RGB_DIR, "field_*.png")) + + for filepath in sorted(rgb_files): + filename = os.path.basename(filepath) + # Pattern: field_{field_id}_{YYYYMMDD}_harvest_xlsx_harvest_rgb.png + match = re.match(r"field_(.+?)_(\d{8})_harvest_xlsx_harvest_rgb\.png", filename) + + if match: + field_id = match.group(1) # e.g., "1000" or "91&92" + date_str = match.group(2) # e.g., "20250814" + + # Format date as YYYY-MM-DD + try: + harvest_date = datetime.strptime(date_str, "%Y%m%d").strftime("%Y-%m-%d") + except ValueError: + harvest_date = date_str + + fields_data.append({ + "field_id": field_id, + "harvest_date": harvest_date, + "filename": filename + }) + + # Sort by field_id (treating numeric parts as integers where possible) + fields_data = sorted(fields_data, key=lambda x: (x["field_id"].replace("&92", ""), )) + + return fields_data + +def create_evaluation_template(): + """Create the Excel evaluation template.""" + print("Loading field data from RGB images...") + fields_data = parse_rgb_filenames() + + if not fields_data: + print("ERROR: No RGB images found in", RGB_DIR) + return + + print(f"Found {len(fields_data)} RGB images") + + # Create workbook + wb = Workbook() + + # === SHEET 1: Evaluation Form === + ws_eval = wb.active + ws_eval.title = "Evaluation" + + # Define styles + header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") + header_font = Font(bold=True, color="FFFFFF", size=11) + border = Border( + left=Side(style='thin'), + right=Side(style='thin'), + top=Side(style='thin'), + bottom=Side(style='thin') + ) + center_align = Alignment(horizontal="center", vertical="center", wrap_text=True) + left_align = Alignment(horizontal="left", vertical="center", wrap_text=True) + + # Column headers + headers = ["Field ID", "Predicted Harvest Date"] + EVALUATORS + for col_idx, header in enumerate(headers, start=1): + cell = ws_eval.cell(row=1, column=col_idx, value=header) + cell.fill = header_fill + cell.font = header_font + cell.alignment = center_align + cell.border = border + + # Set column widths + ws_eval.column_dimensions['A'].width = 15 + ws_eval.column_dimensions['B'].width = 20 + for col_idx in range(3, 3 + len(EVALUATORS)): + ws_eval.column_dimensions[get_column_letter(col_idx)].width = 12 + + # Add data rows + for row_idx, field in enumerate(fields_data, start=2): + ws_eval.cell(row=row_idx, column=1, value=field["field_id"]) + ws_eval.cell(row=row_idx, column=2, value=field["harvest_date"]) + + # Add empty cells for evaluator responses + for col_idx in range(3, 3 + len(EVALUATORS)): + cell = ws_eval.cell(row=row_idx, column=col_idx) + cell.alignment = center_align + cell.border = border + + # Light alternating row color + if row_idx % 2 == 0: + for col_idx in range(1, 3 + len(EVALUATORS)): + ws_eval.cell(row=row_idx, column=col_idx).fill = PatternFill( + start_color="D9E8F5", end_color="D9E8F5", fill_type="solid" + ) + + # Apply borders to all data cells + for col_idx in range(1, 3 + len(EVALUATORS)): + ws_eval.cell(row=row_idx, column=col_idx).border = border + if col_idx == 1 or col_idx == 2: + ws_eval.cell(row=row_idx, column=col_idx).alignment = left_align + + # Freeze panes + ws_eval.freeze_panes = "C2" + + # === SHEET 2: Instructions === + ws_instr = wb.create_sheet("Instructions") + + instr_content = [ + ["RGB Evaluation Instructions", ""], + ["", ""], + ["Overview:", ""], + ["The generated RGB images visualize the predicted harvest dates for each field.", ""], + ["The images are 3x3 grids showing satellite imagery from different dates", ""], + ["centered on the predicted harvest date (the center/red-box image).", ""], + ["", ""], + ["What to Evaluate:", ""], + ["For each field, determine if the predicted harvest date is CORRECT:", ""], + ["", ""], + ["Instructions for Reviewing:", ""], + ["1. Look at the CENTER image (red box) - this is the predicted harvest date", ""], + ["2. Compare to surrounding dates (before and after)", ""], + ["3. Look for change in field color/status:", ""], + [" • BEFORE: Field appears GREEN (growing/healthy crop)", ""], + [" • AT PREDICTED DATE: Field shows BROWN/YELLOW (soil visible, ripe for harvest)", ""], + [" • AFTER: Field continues BROWN (post-harvest or dormant)", ""], + ["", ""], + ["How to Enter Your Assessment:", ""], + ["Enter one of the following in your evaluator column for each field:", ""], + [" • YES = Predicted date is CORRECT (brown/harvest-ready at center date)", ""], + [" • NO = Predicted date is INCORRECT (not ready or already harvested)", ""], + [" • ? or MAYBE = Uncertain (cloudy images, unclear field status)", ""], + ["", ""], + ["Workflow Options:", ""], + ["Option A (Divide Work): Assign 2-3 fields per evaluator (rows divided by column)", ""], + ["Option B (Full Review): Each evaluator reviews all fields (everyone fills all rows)", ""], + ["Option C (Spot Check): Each evaluator samples 5-10 random fields", ""], + ["", ""], + ["Image Location:", ""], + ["All RGB images are in: /laravel_app/storage/app/angata/RGB/", ""], + ["Format: field_{FIELD_ID}_{YYYY-MM-DD}_harvest_xlsx_harvest_rgb.png", ""], + ["", ""], + ["Notes:", ""], + ["• Cloud cover may obscure ground truth - use best judgment", ""], + ["• Fields with multiple bands or irregular shapes: focus on dominant area", ""], + ["• Use context from previous/next dates to validate your assessment", ""], + ] + + # Add instructions text + for row_idx, row_data in enumerate(instr_content, start=1): + for col_idx, value in enumerate(row_data, start=1): + cell = ws_instr.cell(row=row_idx, column=col_idx, value=value) + if row_idx == 1: + cell.font = Font(bold=True, size=14) + elif any(keyword in str(value) for keyword in ["Overview:", "Instructions", "Workflow", "Image Location", "Notes"]): + cell.font = Font(bold=True, size=11) + cell.alignment = Alignment(horizontal="left", vertical="top", wrap_text=True) + + ws_instr.column_dimensions['A'].width = 50 + ws_instr.column_dimensions['B'].width = 80 + + # Save workbook + wb.save(OUTPUT_PATH) + print(f"✓ Evaluation template created: {OUTPUT_PATH}") + print(f"✓ {len(fields_data)} fields added to evaluation form") + print(f"✓ Evaluators: {', '.join(EVALUATORS)}") + +if __name__ == "__main__": + create_evaluation_template() diff --git a/python_app/debug_all_tiles_for_date.py b/python_app/debug_all_tiles_for_date.py deleted file mode 100644 index ba07fe7..0000000 --- a/python_app/debug_all_tiles_for_date.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -""" -Debug script to find all tiles for a date and check which overlap with field boundary -""" - -import json -import rasterio -from rasterio.mask import mask -from pathlib import Path -import numpy as np -import shapely.geometry as shgeom -import pandas as pd - -# Load field 79 boundary -geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson") -field_id = "79" - -print(f"Loading field {field_id} from GeoJSON...") -with open(geojson_path) as f: - geojson_data = json.load(f) - -field_boundary = None -for feature in geojson_data.get('features', []): - props = feature.get('properties', {}) - if str(props.get('field', '')) == str(field_id): - geometry = feature.get('geometry') - if geometry: - geom_type = geometry.get('type', '') - coordinates = geometry.get('coordinates', []) - - if geom_type == 'MultiPolygon': - if coordinates and len(coordinates) > 0: - coords = coordinates[0][0] - field_boundary = shgeom.Polygon(coords) - elif geom_type == 'Polygon': - if coordinates and len(coordinates) > 0: - coords = coordinates[0] - field_boundary = shgeom.Polygon(coords) - break - -if field_boundary is None: - print(f"Field {field_id} not found") - exit(1) - -print(f"Field boundary bounds: {field_boundary.bounds}") -print(f"Field boundary area: {field_boundary.area}") - -# Find a specific date directory -tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5") -target_date = pd.Timestamp("2026-01-15") # Use a recent date that exists - -# Find tiles for that date -date_dirs = [] -for date_dir in tiff_dir.iterdir(): - if date_dir.is_dir(): - try: - dir_name = date_dir.name - date_str = dir_name.split('_')[0] - tile_date = pd.Timestamp(date_str) - if tile_date == target_date: - date_dirs.append(date_dir) - except: - pass - -if not date_dirs: - print(f"No tiles found for {target_date}") - exit(1) - -print(f"\nFound {len(date_dirs)} date directory(ies) for {target_date}") - -for date_dir in date_dirs: - print(f"\n=== Checking date directory: {date_dir.name} ===") - - tiles = list(date_dir.glob("*.tif")) - print(f"Found {len(tiles)} tiles in this directory") - - for tile_path in sorted(tiles): - try: - with rasterio.open(tile_path) as src: - tile_bounds = src.bounds - tile_geom = shgeom.box(*tile_bounds) - - intersects = field_boundary.intersects(tile_geom) - intersection = field_boundary.intersection(tile_geom) if intersects else None - intersection_area = intersection.area if intersection else 0 - - print(f"\n{tile_path.name}") - print(f" Tile bounds: {tile_bounds}") - print(f" Intersects field: {intersects}") - if intersects: - print(f" Intersection area: {intersection_area:.8f}") - - # Try to mask this tile - geom = shgeom.mapping(field_boundary) - try: - masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3]) - print(f" ✓ Successfully masked! Shape: {masked_data.shape}") - - # Check the data in each band - for i, band_idx in enumerate([1, 2, 3]): - band_data = masked_data[i] - non_zero = (band_data != 0).sum() - print(f" Band {band_idx}: {non_zero} non-zero pixels out of {band_data.size}") - except ValueError as e: - print(f" ✗ Masking failed: {e}") - except Exception as e: - print(f" Error reading tile: {e}") diff --git a/python_app/debug_field_mask.py b/python_app/debug_field_mask.py deleted file mode 100644 index ce96700..0000000 --- a/python_app/debug_field_mask.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python -""" -Debug script to diagnose why field boundary masking produces no data -""" - -import json -import rasterio -from rasterio.mask import mask -from pathlib import Path -import numpy as np -import shapely.geometry as shgeom - -# Load a sample field boundary -geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson") -field_id = "79" # A field that had issues - -print(f"Loading field {field_id} from GeoJSON...") -with open(geojson_path) as f: - geojson_data = json.load(f) - -field_boundary = None -for feature in geojson_data.get('features', []): - props = feature.get('properties', {}) - if str(props.get('field', '')) == str(field_id): - geometry = feature.get('geometry') - if geometry: - geom_type = geometry.get('type', '') - coordinates = geometry.get('coordinates', []) - - if geom_type == 'MultiPolygon': - if coordinates and len(coordinates) > 0: - coords = coordinates[0][0] - field_boundary = shgeom.Polygon(coords) - elif geom_type == 'Polygon': - if coordinates and len(coordinates) > 0: - coords = coordinates[0] - field_boundary = shgeom.Polygon(coords) - break - -if field_boundary is None: - print(f"Field {field_id} not found") - exit(1) - -print(f"Field boundary bounds: {field_boundary.bounds}") -print(f"Field boundary area: {field_boundary.area}") - -# Load a sample TIFF tile -tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5") -tile_file = None -for date_dir in sorted(tiff_dir.iterdir()): - if date_dir.is_dir(): - for tif in date_dir.glob("*.tif"): - if tif.stat().st_size > 12e6: - tile_file = tif - break - if tile_file: - break - -if not tile_file: - print("No suitable TIFF found") - exit(1) - -print(f"\nTesting with TIFF: {tile_file.name}") - -with rasterio.open(tile_file) as src: - print(f"TIFF Bounds: {src.bounds}") - print(f"TIFF CRS: {src.crs}") - - # Check if field boundary is within tile bounds - tile_box = shgeom.box(*src.bounds) - intersects = field_boundary.intersects(tile_box) - print(f"Field boundary intersects tile: {intersects}") - - if intersects: - intersection = field_boundary.intersection(tile_box) - print(f"Intersection area: {intersection.area}") - print(f"Intersection bounds: {intersection.bounds}") - - # Try to mask and see what we get - print("\nAttempting to mask...") - geom = shgeom.mapping(field_boundary) - try: - masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3]) - print(f"Masked data shape: {masked_data.shape}") - print(f"Masked data dtype: {masked_data.dtype}") - - # Check the data - for i, band_idx in enumerate([1, 2, 3]): - band_data = masked_data[i] - print(f"\nBand {band_idx}:") - print(f" min: {np.nanmin(band_data):.6f}") - print(f" max: {np.nanmax(band_data):.6f}") - print(f" mean: {np.nanmean(band_data):.6f}") - print(f" % valid (non-zero): {(band_data != 0).sum() / band_data.size * 100:.2f}%") - print(f" % NaN: {np.isnan(band_data).sum() / band_data.size * 100:.2f}%") - - # Show sample values - valid_pixels = band_data[band_data != 0] - if len(valid_pixels) > 0: - print(f" Sample valid values: {valid_pixels[:10]}") - except ValueError as e: - print(f"Error during masking: {e}") diff --git a/python_app/debug_tiff_inspect.py b/python_app/debug_tiff_inspect.py deleted file mode 100644 index be51e5e..0000000 --- a/python_app/debug_tiff_inspect.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python -""" -Debug script to inspect TIFF file structure and data -""" - -import rasterio -from pathlib import Path -import numpy as np - -# Pick a tile file to inspect -tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5") - -# Find first available tile -tile_file = None -for date_dir in sorted(tiff_dir.iterdir()): - if date_dir.is_dir(): - for tif in date_dir.glob("*.tif"): - if tif.stat().st_size > 12e6: # Skip empty files - tile_file = tif - break - if tile_file: - break - -if not tile_file: - print("No suitable TIFF files found") - exit(1) - -print(f"Inspecting: {tile_file.name}") -print("=" * 80) - -with rasterio.open(tile_file) as src: - print(f"Band count: {src.count}") - print(f"Data type: {src.dtypes[0]}") - print(f"Shape: {src.height} x {src.width}") - print(f"CRS: {src.crs}") - print(f"Bounds: {src.bounds}") - print() - - # Read each band - for band_idx in range(1, min(6, src.count + 1)): - data = src.read(band_idx) - print(f"Band {band_idx}:") - print(f" dtype: {data.dtype}") - print(f" range: {data.min():.6f} - {data.max():.6f}") - print(f" mean: {data.mean():.6f}") - print(f" % valid (non-zero): {(data != 0).sum() / data.size * 100:.1f}%") - print() diff --git a/python_app/download_planet_missing_dates.py b/python_app/download_planet_missing_dates.py index b6723b7..9e15d25 100644 --- a/python_app/download_planet_missing_dates.py +++ b/python_app/download_planet_missing_dates.py @@ -4,7 +4,7 @@ Purpose: Download Planet satellite data for missing dates only (skip existing fi Can be called from batch scripts or other Python scripts. Usage: - python download_planet_missing_dates.py --start 2022-01-01 --end 2025-12-15 --project angata + python download_planet_missing_dates.py --start 2026-01-17 --end 2026-12-20 --project angata python download_planet_missing_dates.py --start 2023-06-01 --end 2023-06-30 --project angata --dry-run Environment variables (alternative to CLI args): diff --git a/python_app/merge_ci_data.R b/python_app/merge_ci_data.R deleted file mode 100644 index 8c179bb..0000000 --- a/python_app/merge_ci_data.R +++ /dev/null @@ -1,29 +0,0 @@ -# Merge all CI RDS files into a single CSV -library(tidyverse) - -# Paths -ci_data_dir <- "r_app/experiments/ci_graph_exploration/CI_data" -output_csv <- "python_app/lstm_ci_data_combined.csv" - -# Find all RDS files -rds_files <- list.files(ci_data_dir, pattern = "\\.rds$", full.names = TRUE) -print(paste("Found", length(rds_files), "RDS files")) - -# Load and combine all files -combined_data <- tibble() - -for (file in rds_files) { - filename <- basename(file) - client_name <- sub("\\.rds$", "", filename) # Extract client name from filename - print(paste("Loading:", filename, "- Client:", client_name)) - data <- readRDS(file) - data$client <- client_name - combined_data <- bind_rows(combined_data, data) -} - -print(paste("Total rows:", nrow(combined_data))) -print(paste("Columns:", paste(names(combined_data), collapse = ", "))) - -# Write to CSV -write.csv(combined_data, output_csv, row.names = FALSE) -print(paste("✓ Saved to:", output_csv)) diff --git a/python_app/rgb_visualization.py b/python_app/rgb_visualization.py index c1fd2e0..ccb83f6 100644 --- a/python_app/rgb_visualization.py +++ b/python_app/rgb_visualization.py @@ -88,95 +88,116 @@ def load_field_boundaries(geojson_path, field_id): return None, None -def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60): +def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=None, debug=False): """ - Find all tile files for target_date (or closest date) that overlap with field_boundary. + Find tile files with actual data (not cloud-masked) for target_date or nearest date. - Tile files are organized in subdirectories by date: 5x5/YYYY-MM-DD_HH/*.tif + Searches by increasing distance from target date until finding tiles with data. + Avoids reusing dates in exclude_dates to ensure temporal diversity in grids. Args: target_date (pd.Timestamp): Target date to find tiles near tiff_dir (Path): Directory containing 5x5 date subdirectories field_boundary (shapely.Polygon): Field boundary for overlap detection days_window (int): Max days to search before/after target + exclude_dates (list): List of dates to skip (avoid repetition) + debug (bool): Enable detailed debugging output Returns: tuple: (list of tile paths, actual_date, days_diff) - list: tile paths that overlap field - pd.Timestamp: actual date of tiles found - int: days difference from target to actual date found """ target_date = pd.Timestamp(target_date) tiff_dir = Path(tiff_dir) + exclude_dates = exclude_dates or [] + exclude_dates = [pd.Timestamp(d) for d in exclude_dates] if not tiff_dir.exists(): + if debug: + print(f" [DEBUG] TIFF dir does not exist: {tiff_dir}") return [], None, None - # Find all date subdirectories - available_dates = {} # {date: ([tile file paths], actual_dir_name)} - min_size_mb = 12.0 # Empty files are ~11.56 MB + # Build map of all available dates + available_dates = {} + date_parse_errors = 0 for date_dir in tiff_dir.iterdir(): if not date_dir.is_dir(): continue - try: - # Parse date from directory name (YYYY-MM-DD or YYYY-MM-DD_HH) dir_name = date_dir.name - # Extract just the date part before underscore if it exists date_str = dir_name.split('_')[0] tile_date = pd.Timestamp(date_str) - days_diff = (tile_date - target_date).days - if abs(days_diff) > days_window: - continue - - # Find all .tif files in this date directory tile_files = [] for tile_file in date_dir.glob('*.tif'): - # Skip obviously empty files - file_size_mb = tile_file.stat().st_size / (1024 * 1024) - if file_size_mb >= min_size_mb: - tile_files.append(tile_file) + # Include ALL tiles, regardless of size + # Some tiles may be small but still contain valid data for specific fields + tile_files.append(tile_file) if tile_files: available_dates[tile_date] = (tile_files, dir_name) - except: - pass + except Exception as e: + date_parse_errors += 1 + if debug: + print(f" [DEBUG] Failed to parse date from {date_dir.name}: {e}") + + if debug: + print(f" [DEBUG] Found {len(available_dates)} dates with tile files, {date_parse_errors} parse errors") + print(f" [DEBUG] Date range: {min(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'} to {max(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'}") if not available_dates: return [], None, None - # Find closest date - closest_date = min(available_dates.keys(), key=lambda d: abs((d - target_date).days)) - days_diff = (closest_date - target_date).days - tiles, _ = available_dates[closest_date] + # Search dates by increasing distance from target, looking for tiles with actual data + sorted_dates = sorted(available_dates.keys(), key=lambda d: abs((d - target_date).days)) - # Filter tiles to only those that overlap field boundary - if rasterio is None or field_boundary is None: - # If rasterio not available, use all tiles (conservative approach) - return tiles, closest_date, days_diff + for search_date in sorted_dates: + # Skip if this date was recently used (avoid temporal repetition) + if search_date in exclude_dates: + continue + + tiles, dir_name = available_dates[search_date] + days_diff = (search_date - target_date).days + + # Try to find overlapping tiles at this date + overlapping_tiles = [] + tile_check_errors = 0 + + for tile_path in tiles: + try: + with rasterio.open(tile_path) as src: + tile_bounds = src.bounds + tile_geom = shgeom.box(*tile_bounds) + + # Debug first tile + if debug and len(overlapping_tiles) == 0 and tile_check_errors == 0: + print(f" [DEBUG] First tile check for {tile_path.name}:") + print(f" Tile bounds: {tile_bounds}") + print(f" Tile CRS: {src.crs}") + print(f" Field bounds: {field_boundary.bounds}") + print(f" Field geom type: {field_boundary.geom_type}") + print(f" Intersects: {tile_geom.intersects(field_boundary)}") + + if tile_geom.intersects(field_boundary): + overlapping_tiles.append(tile_path) + except Exception as e: + tile_check_errors += 1 + if debug: + print(f" [DEBUG] Error checking tile {tile_path.name}: {e}") + + if debug: + print(f" [DEBUG] Date {search_date.strftime('%Y-%m-%d')}: {len(tiles)} tiles, {len(overlapping_tiles)} overlap field, {tile_check_errors} errors") + + if overlapping_tiles: + # Found overlapping tiles, return them + print(f" [FIND_TILES] Target: {target_date.strftime('%Y-%m-%d')}, Using: {search_date.strftime('%Y-%m-%d')} ({days_diff:+d}d), Tiles: {[Path(t).name for t in overlapping_tiles]}") + return overlapping_tiles, search_date, days_diff - overlapping_tiles = [] + # No overlapping tiles found at all + if debug: + print(f" [DEBUG] No overlapping tiles found for {target_date.strftime('%Y-%m-%d')} within {len(sorted_dates)} searched dates") - for tile_path in tiles: - try: - with rasterio.open(tile_path) as src: - # Get tile bounds - tile_bounds = src.bounds # (left, bottom, right, top) - tile_geom = shgeom.box(*tile_bounds) - - # Check if tile overlaps field - if tile_geom.intersects(field_boundary): - overlapping_tiles.append(tile_path) - except: - pass - - if not overlapping_tiles: - # No overlapping tiles found, return all tiles for the closest date - return tiles, closest_date, days_diff - - return overlapping_tiles, closest_date, days_diff + return [], None, None def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)): @@ -211,44 +232,44 @@ def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)): # For merged_final_tif: bands 1,2,3 are R,G,B bands_to_read = (1, 2, 3) - # Mask and read bands + # Mask and read bands - extract ONLY the specific field polygon geom = shgeom.mapping(field_boundary) try: masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read)) - - # Stack RGB rgb = np.stack([masked_data[i] for i in range(3)], axis=-1) - - # Convert to float32 if not already - rgb = rgb.astype(np.float32) - - # Normalize to 0-1 range - # Data appears to be 8-bit (0-255 range) stored as float32 - # Check actual max value to determine normalization - max_val = np.nanmax(rgb) - if max_val > 0: - # If max is around 255 or less, assume 8-bit - if max_val <= 255: - rgb = rgb / 255.0 - # If max is around 65535, assume 16-bit - elif max_val <= 65535: - rgb = rgb / 65535.0 - # Otherwise divide by max to normalize - else: - rgb = rgb / max_val - - rgb = np.clip(rgb, 0, 1) - - # Check if result is all NaN - if np.all(np.isnan(rgb)): - return None - - # Replace any remaining NaN with 0 (cloud/invalid pixels) - rgb = np.nan_to_num(rgb, nan=0.0) - - return rgb - except ValueError: + except (ValueError, RuntimeError) as e: + # Mask failed - field doesn't overlap this tile or geometry issue + print(f" MASK ERROR on {Path(tiff_path).name}: {str(e)[:50]}") return None + + # Convert to float32 if not already + rgb = rgb.astype(np.float32) + + # Normalize to 0-1 range + # Data appears to be 8-bit (0-255 range) stored as float32 + # Check actual max value to determine normalization + max_val = np.nanmax(rgb) + if max_val > 0: + # If max is around 255 or less, assume 8-bit + if max_val <= 255: + rgb = rgb / 255.0 + # If max is around 65535, assume 16-bit + elif max_val <= 65535: + rgb = rgb / 65535.0 + # Otherwise divide by max to normalize + else: + rgb = rgb / max_val + + rgb = np.clip(rgb, 0, 1) + + # Check if result is all NaN + if np.all(np.isnan(rgb)): + return None + + # Replace any remaining NaN with 0 (cloud/invalid pixels) + rgb = np.nan_to_num(rgb, nan=0.0) + + return rgb except Exception as e: return None @@ -284,9 +305,16 @@ def load_and_composite_tiles_rgb(tile_paths, field_boundary): if len(rgb_arrays) == 1: composited = rgb_arrays[0] else: - # If multiple tiles, use max composite - stacked = np.stack(rgb_arrays, axis=0) - composited = np.max(stacked, axis=0) + # If multiple tiles, need to handle different shapes + # Find common shape or use max/min approach that handles variable sizes + try: + # Try to stack if same shape + stacked = np.stack(rgb_arrays, axis=0) + composited = np.max(stacked, axis=0) + except ValueError: + # Different shapes - use the largest (most complete) tile + # This happens when tiles are masked to different field areas + composited = max(rgb_arrays, key=lambda x: x.size) composited = composited.astype(np.float32) @@ -307,6 +335,26 @@ def load_and_composite_tiles_rgb(tile_paths, field_boundary): return None +def has_valid_rgb_data(rgb_data, threshold=0.05): + """ + Check if RGB image has actual data (not black/empty). + Returns True if max value > threshold (not all zeros/black). + """ + if rgb_data is None: + return False + + try: + # Check if there's any variation in the RGB data + data_max = np.nanmax(rgb_data) + data_min = np.nanmin(rgb_data) + + # Image is valid if max > threshold AND there's variation + has_data = data_max > threshold and (data_max - data_min) > 0.01 + return has_data + except: + return False + + def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field_boundary, title, output_dir, harvest_type='registered', model_name=None, harvest_index=None): """ @@ -334,91 +382,263 @@ def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field """ harvest_date = pd.Timestamp(harvest_date) - # Target dates: 15 images at 7-day intervals (8 pre, 1 near, 6 post) - target_dates = [ - harvest_date - timedelta(days=56), # T-56d - harvest_date - timedelta(days=49), # T-49d - harvest_date - timedelta(days=42), # T-42d - harvest_date - timedelta(days=35), # T-35d - harvest_date - timedelta(days=28), # T-28d - harvest_date - timedelta(days=21), # T-21d - harvest_date - timedelta(days=14), # T-14d - harvest_date - timedelta(days=7), # T-7d - harvest_date, # T~0d (near harvest) - harvest_date + timedelta(days=7), # T+7d - harvest_date + timedelta(days=14), # T+14d - harvest_date + timedelta(days=21), # T+21d - harvest_date + timedelta(days=28), # T+28d - harvest_date + timedelta(days=35), # T+35d - harvest_date + timedelta(days=42), # T+42d - harvest_date + timedelta(days=56), # T+56d (Note: non-standard to fill 5th col in row 3) - ] + # Pre-allocate 15 image slots + rgb_images = [None] * 15 + days_offsets = [None] * 15 + actual_dates = [None] * 15 + used_dates = set() # Use set for efficient lookups - # Find TIFFs for each date - rgb_images = [] - days_offsets = [] - actual_dates = [] # Store actual dates of TIFFs found + # STEP 0: Debug - List all available dates + print(f" [STEP 0] Checking available TIFF dates in {tiff_dir}...") + available_dates = [] + if tiff_dir.exists(): + for date_folder in sorted(tiff_dir.iterdir()): + if date_folder.is_dir(): + try: + date_obj = datetime.strptime(date_folder.name, '%Y-%m-%d').date() + available_dates.append(date_obj) + except: + pass + print(f" Found {len(available_dates)} dates with data: {available_dates[:5]}... (showing first 5)") - for target in target_dates: - tile_paths, actual_date, days_diff = find_overlapping_tiles(target, tiff_dir, field_boundary, days_window=60) - - if not tile_paths or actual_date is None: - rgb_images.append(None) - days_offsets.append(None) - actual_dates.append(None) - print(f" ⚠ No tiles found within 60 days of {target.strftime('%Y-%m-%d')} with sufficient data") - continue - - rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary) - rgb_images.append(rgb) - days_offsets.append(days_diff) - actual_dates.append(actual_date) - - if rgb is not None: - print(f" ✓ Loaded {len(tile_paths)} tile(s) for {actual_date.strftime('%Y-%m-%d')} ({days_diff:+d}d from target)") + # STEP 1: Find anchor image (closest to predicted harvest date) FIRST + # Search within ±14 days of predicted harvest date first, then expand if needed + print(f" [STEP 1] Finding anchor (closest to harvest {harvest_date.strftime('%Y-%m-%d')}, searching ±14 days)...") + anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles( + harvest_date, tiff_dir, field_boundary, days_window=14, exclude_dates=[], debug=False + ) + + anchor_rgb = None + anchor_idx = 8 # Position 8 is the center (T~0d / harvest date position) + failed_anchor_dates = [] # Track dates that failed validation + + if anchor_tile_paths and anchor_date: + anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary) + if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb): + rgb_images[anchor_idx] = anchor_rgb + days_offsets[anchor_idx] = 0 # Anchor is reference point + actual_dates[anchor_idx] = anchor_date + used_dates.add(anchor_date) + print(f" ✓ ANCHOR FOUND (±14d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)") else: - print(f" ⚠ Loaded {len(tile_paths)} tile(s) but RGB data is None") + failed_anchor_dates.append(anchor_date) + print(f" ⚠ Found date {anchor_date.strftime('%Y-%m-%d')} within ±14d, but image has no valid data") + print(f" [RETRY] Expanding anchor search to ±60 days (excluding failed dates)...") + anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles( + harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=set(failed_anchor_dates), debug=False + ) + if anchor_tile_paths and anchor_date: + anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary) + if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb): + rgb_images[anchor_idx] = anchor_rgb + days_offsets[anchor_idx] = 0 # Anchor is reference point + actual_dates[anchor_idx] = anchor_date + used_dates.add(anchor_date) + print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)") + else: + failed_anchor_dates.append(anchor_date) + print(f" ✗ No valid anchor found even within ±60 days") + else: + print(f" ✗ No tiles found for any date within ±60 days") + else: + print(f" ⚠ No tiles found within ±14 days, expanding search...") + anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles( + harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=[], debug=False + ) + if anchor_tile_paths and anchor_date: + anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary) + if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb): + rgb_images[anchor_idx] = anchor_rgb + days_offsets[anchor_idx] = 0 # Anchor is reference point + actual_dates[anchor_idx] = anchor_date + used_dates.add(anchor_date) + print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)") + else: + print(f" ✗ No valid anchor found even within ±60 days") + else: + print(f" ✗ No tiles found for any date within ±60 days") + + # STEP 2: Dynamically collect images BEFORE anchor date + # Strategy: Go backwards from anchor with progressively larger search windows + # Start at 7 days, then try 10, 15, 20, 30+ days apart + print(f" [STEP 2] Collecting images BEFORE anchor (going backwards, flexible spacing)...") + before_positions = [7, 6, 5, 4, 3, 2, 1, 0] # Will fill in reverse order (7→0) + before_images = [] # (position, date, rgb, offset) + + pos_idx = 0 # Index into before_positions + last_found_date = anchor_date + + # Progressive search offsets: try these day offsets in order + search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days before last found image + + while pos_idx < len(before_positions) and last_found_date.year >= 2024: + found_this_iteration = False + + # Try each offset until we find a valid image + for days_offset in search_offsets: + search_target_date = last_found_date - timedelta(days=days_offset) + + tile_paths, actual_date, days_diff = find_overlapping_tiles( + search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False + ) + + if tile_paths and actual_date: + rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary) + if rgb is not None and has_valid_rgb_data(rgb): + # Found valid image! + overall_max = np.nanmax(rgb) + overall_min = np.nanmin(rgb) + + offset_from_anchor = (actual_date - anchor_date).days + before_images.append((before_positions[pos_idx], actual_date, rgb, offset_from_anchor)) + used_dates.add(actual_date) + last_found_date = actual_date # Move backwards from this date + + print(f" ✓ Before[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}") + + pos_idx += 1 + found_this_iteration = True + break # Found one, stop trying larger offsets + + # If nothing found with any offset, we're done collecting before images + if not found_this_iteration: + break + + # Store collected before images + for pos, actual_date, rgb, offset in before_images: + rgb_images[pos] = rgb + actual_dates[pos] = actual_date + days_offsets[pos] = offset + + # STEP 3: Dynamically collect images AFTER anchor date + # Strategy: Go forwards from anchor with progressively larger search windows + # Start at 7 days, then try 10, 15, 20, 30+ days apart + print(f" [STEP 3] Collecting images AFTER anchor (going forwards, flexible spacing)...") + after_positions = [9, 10, 11, 12, 13, 14] # Will fill in order (9→14) + after_images = [] # (position, date, rgb, offset) + + pos_idx = 0 # Index into after_positions + last_found_date = anchor_date + max_search_date = anchor_date + timedelta(days=200) # Don't search beyond 200 days forward + + # Progressive search offsets: try these day offsets in order + search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days after last found image + + while pos_idx < len(after_positions) and last_found_date < max_search_date: + found_this_iteration = False + + # Try each offset until we find a valid image + for days_offset in search_offsets: + search_target_date = last_found_date + timedelta(days=days_offset) + + # Don't search beyond max date + if search_target_date > max_search_date: + break + + tile_paths, actual_date, days_diff = find_overlapping_tiles( + search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False + ) + + if tile_paths and actual_date: + rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary) + if rgb is not None and has_valid_rgb_data(rgb): + # Found valid image! + overall_max = np.nanmax(rgb) + overall_min = np.nanmin(rgb) + + offset_from_anchor = (actual_date - anchor_date).days + after_images.append((after_positions[pos_idx], actual_date, rgb, offset_from_anchor)) + used_dates.add(actual_date) + last_found_date = actual_date # Move forwards from this date + + print(f" ✓ After[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}") + + pos_idx += 1 + found_this_iteration = True + break # Found one, stop trying larger offsets + + # If nothing found with any offset, we're done collecting after images + if not found_this_iteration: + break + + # Store collected after images + for pos, actual_date, rgb, offset in after_images: + rgb_images[pos] = rgb + actual_dates[pos] = actual_date + days_offsets[pos] = offset # Create 5x3 grid plot (15 images) fig, axes = plt.subplots(3, 5, figsize=(25, 15)) - fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")}', + + # Build title with anchor offset information + anchor_offset_from_harvest = (actual_dates[8] - harvest_date).days if actual_dates[8] is not None else None + if anchor_offset_from_harvest is not None and anchor_offset_from_harvest != 0: + anchor_info = f"(Anchor: {actual_dates[8].strftime('%Y-%m-%d')}, {anchor_offset_from_harvest:+d}d from predicted harvest)" + else: + anchor_info = f"(Exact match with anchor: {actual_dates[8].strftime('%Y-%m-%d')})" if actual_dates[8] is not None else "" + + fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")} {anchor_info}', fontsize=16, fontweight='bold') # Grid positions (5 columns, 3 rows = 15 images) positions = [ ('T-56d', 0, 0), ('T-49d', 0, 1), ('T-42d', 0, 2), ('T-35d', 0, 3), ('T-28d', 0, 4), - ('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('T~0d', 1, 3), ('T+7d', 1, 4), + ('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('HARVEST', 1, 3), ('T+7d', 1, 4), ('T+14d', 2, 0), ('T+21d', 2, 1), ('T+28d', 2, 2), ('T+35d', 2, 3), ('T+42d', 2, 4), ] - for idx, (label, row, col) in enumerate(positions): # All 15 images + for idx, (label, row, col) in enumerate(positions): ax = axes[row, col] if idx < len(rgb_images) and rgb_images[idx] is not None: rgb_data = rgb_images[idx] - # Debug: check data range - data_min, data_max = np.nanmin(rgb_data), np.nanmax(rgb_data) - print(f" DEBUG: {label} RGB range: {data_min:.4f} - {data_max:.4f}, shape: {rgb_data.shape}") + # Debug: check data range for ALL bands + data_min = np.nanmin(rgb_data) + data_max = np.nanmax(rgb_data) + data_mean = np.nanmean(rgb_data) + data_std = np.nanstd(rgb_data) + + # Check per-band stats + r_min, r_max, r_mean = np.nanmin(rgb_data[:,:,0]), np.nanmax(rgb_data[:,:,0]), np.nanmean(rgb_data[:,:,0]) + g_min, g_max, g_mean = np.nanmin(rgb_data[:,:,1]), np.nanmax(rgb_data[:,:,1]), np.nanmean(rgb_data[:,:,1]) + b_min, b_max, b_mean = np.nanmin(rgb_data[:,:,2]), np.nanmax(rgb_data[:,:,2]), np.nanmean(rgb_data[:,:,2]) + + print(f" DEBUG VALID {label} ({actual_dates[idx].strftime('%Y-%m-%d')}): RGB overall {data_min:.4f}-{data_max:.4f} (mean={data_mean:.4f}, std={data_std:.4f})") + print(f" R: {r_min:.4f}-{r_max:.4f} (μ={r_mean:.4f}), G: {g_min:.4f}-{g_max:.4f} (μ={g_mean:.4f}), B: {b_min:.4f}-{b_max:.4f} (μ={b_mean:.4f})") # Display with explicit vmin/vmax to handle normalized 0-1 data ax.imshow(rgb_data, vmin=0, vmax=1) - # Build title: label + offset + actual date - offset_str = f"{days_offsets[idx]:+d}d" if days_offsets[idx] is not None else "?" - date_str = actual_dates[idx].strftime('%Y-%m-%d') if actual_dates[idx] is not None else "No Date" - ax.set_title(f'{label}\n{offset_str}\n{date_str}', fontsize=10, fontweight='bold') + # Build title: show BOTH anchor offset AND harvest offset + if days_offsets[idx] is not None: + offset_from_anchor = days_offsets[idx] + offset_from_harvest = (actual_dates[idx] - harvest_date).days + + if idx == 8: # ANCHOR/HARVEST position + if offset_from_harvest == 0: + offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')})" + else: + offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')}, {offset_from_harvest:+d}d from predicted)" + else: + # Show both offsets: from anchor and from harvest + offset_str = f"{offset_from_anchor:+d}d from anchor\n{offset_from_harvest:+d}d from harvest\n{actual_dates[idx].strftime('%Y-%m-%d')}" + else: + offset_str = "No Data" - # Add red box around harvest date (T~0d at row=1, col=3) - if label == 'T~0d': + ax.set_title(offset_str, fontsize=9, fontweight='bold') + + # Add red box around the ANCHOR IMAGE (position 8 is harvest/anchor) + if idx == 8: # Position 8 is the anchor for spine in ax.spines.values(): spine.set_edgecolor('red') spine.set_linewidth(4) else: ax.text(0.5, 0.5, 'No Data', ha='center', va='center', fontsize=12, color='gray') - ax.set_title(label, fontsize=10) + ax.set_title('No Data', fontsize=10) + print(f" DEBUG EMPTY {label}: No image data collected") - # Add red box for T~0d even if no data - if label == 'T~0d': + # Add red box for anchor position even if no data + if idx == 8: # Position 8 is the anchor for spine in ax.spines.values(): spine.set_edgecolor('red') spine.set_linewidth(4) diff --git a/r_app/10_create_master_grid_and_split_tiffs.R b/r_app/10_create_master_grid_and_split_tiffs.R index 6279fb2..fdab65c 100644 --- a/r_app/10_create_master_grid_and_split_tiffs.R +++ b/r_app/10_create_master_grid_and_split_tiffs.R @@ -6,6 +6,8 @@ #' 2. Create master 5×5 grid covering all TIFFs #' 3. Split each daily TIFF into 25 tiles using the master grid #' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif +#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18 + library(terra) library(sf) @@ -32,7 +34,7 @@ if (length(args) >= 2) { } PROJECT <- "angata" -TIFF_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "merged_tif_8b") +TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b") # GRID SIZE CONFIGURATION - Change this to use different grid sizes # Options: 5x5 (25 tiles), 10x10 (100 tiles), etc. @@ -42,10 +44,10 @@ GRID_NCOLS <- 5 # Construct grid-specific subfolder path GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS) -OUTPUT_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL) +OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL) # Load field boundaries for overlap checking -GEOJSON_PATH <- file.path("..", "laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson") +GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson") cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "") cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") diff --git a/r_app/20_ci_extraction.R b/r_app/20_ci_extraction.R index d9c3d37..ab82188 100644 --- a/r_app/20_ci_extraction.R +++ b/r_app/20_ci_extraction.R @@ -13,13 +13,13 @@ # # Examples: # # Angata 8-band data (with UDM cloud masking) -# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/02_ci_extraction.R 2026-01-02 7 angata merged_tif_8b +# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif_8b # # # Aura 4-band data -# Rscript 02_ci_extraction.R 2025-11-26 7 aura merged_tif +# Rscript 20_ci_extraction.R 2025-11-26 7 aura merged_tif # # # Auto-detects and uses tiles if available: -# Rscript 02_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists) +# Rscript 20_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists) # 1. Load required packages # ----------------------- diff --git a/r_app/30_interpolate_growth_model.R b/r_app/30_interpolate_growth_model.R index 6707190..ed310e5 100644 --- a/r_app/30_interpolate_growth_model.R +++ b/r_app/30_interpolate_growth_model.R @@ -8,7 +8,7 @@ # # Usage: Rscript interpolate_growth_model.R [project_dir] # - project_dir: Project directory name (e.g., "chemba") -# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/03_interpolate_growth_model.R angata +# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata # 1. Load required packages # ----------------------- diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index bef0764..ed2c330 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -186,8 +186,15 @@ main <- function() { end_date <- if (length(args) >= 1 && !is.na(args[1])) { as.Date(args[1]) } else if (exists("end_date", envir = .GlobalEnv)) { - # For recursive calls, use the end_date that was set in the global environment - get("end_date", envir = .GlobalEnv) + global_date <- get("end_date", envir = .GlobalEnv) + # Check if it's a valid Date with length > 0 + if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) { + global_date + } else if (exists("end_date_str", envir = .GlobalEnv)) { + as.Date(get("end_date_str", envir = .GlobalEnv)) + } else { + Sys.Date() + } } else if (exists("end_date_str", envir = .GlobalEnv)) { as.Date(get("end_date_str", envir = .GlobalEnv)) } else { @@ -210,10 +217,15 @@ main <- function() { 7 } + # Validate end_date is a proper Date object + if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) { + stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date)) + } + assign("project_dir", project_dir, envir = .GlobalEnv) assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv) - message("\n" %+% strrep("=", 70)) + message("\n", strrep("=", 70)) message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION") message(strrep("=", 70)) message("Date:", format(end_date, "%Y-%m-%d")) @@ -238,7 +250,7 @@ main <- function() { # ========== PER-FIELD ANALYSIS (SC-64) ========== - message("\n" %+% strrep("-", 70)) + message("\n", strrep("-", 70)) message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)") message(strrep("-", 70)) @@ -694,9 +706,9 @@ main <- function() { # ========== FINAL SUMMARY ========== - cat("\n" %+% strrep("=", 70) %+% "\n") + cat("\n", strrep("=", 70), "\n") cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n") - cat(strrep("=", 70) %+% "\n") + cat(strrep("=", 70), "\n") cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n") cat("Excel export:", export_paths$excel, "\n") cat("RDS export:", export_paths$rds, "\n") diff --git a/r_app/run_full_pipeline.R b/r_app/run_full_pipeline.R index 657fec1..ae6ff14 100644 --- a/r_app/run_full_pipeline.R +++ b/r_app/run_full_pipeline.R @@ -1,31 +1,40 @@ # ============================================================================== # FULL PIPELINE RUNNER # ============================================================================== -# Runs scripts 02, 03, 04, 09 (KPIs), 09 (Weekly), and 10 (CI Report Simple) +# Mixed Python/R pipeline: +# 1. Python: Download Planet images +# 2. R 10: Create master grid and split TIFFs +# 3. R 20: CI Extraction +# 4. R 21: Convert CI RDS to CSV +# 5. R 30: Interpolate growth model +# 6. Python 31: Harvest imminent weekly +# 7. R 40: Mosaic creation +# 8. R 80: Calculate KPIs # # ============================================================================== # HOW TO RUN THIS SCRIPT # ============================================================================== # -# In PowerShell or Command Prompt: +# Run from the smartcane/ directory: # # Option 1 (Recommended - shows real-time output): -# Rscript run_full_pipeline.R +# Rscript r_app/run_full_pipeline.R # # Option 2 (Full path to Rscript - use & in PowerShell for paths with spaces): -# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" run_full_pipeline.R +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R # # Option 3 (Batch mode - output saved to .Rout file): -# R CMD BATCH --vanilla run_full_pipeline.R +# R CMD BATCH --vanilla r_app/run_full_pipeline.R # # ============================================================================== # ============================================================================== # *** EDIT THESE VARIABLES *** -end_date <- "2025-12-24" # or specify: "2025-12-02", Sys.Date() +end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date() offset <- 7 # days to look back project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba" data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" +force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist # *************************** # Format dates @@ -35,117 +44,350 @@ end_date_str <- format(as.Date(end_date), "%Y-%m-%d") pipeline_success <- TRUE # ============================================================================== -# SCRIPT 02: CI EXTRACTION +# INTELLIGENT CHECKING: What has already been completed? # ============================================================================== -cat("\n========== RUNNING SCRIPT 02: CI EXTRACTION ==========\n") -tryCatch({ - source("r_app/02_ci_extraction.R") - main() # Call the main function - cat("✓ Script 02 completed\n") -}, error = function(e) { - cat("✗ Error in Script 02:", e$message, "\n") - pipeline_success <<- FALSE -}) +cat("\n========== CHECKING EXISTING OUTPUTS ==========\n") -# ============================================================================== -# SCRIPT 03: INTERPOLATE GROWTH MODEL -# ============================================================================== -cat("\n========== RUNNING SCRIPT 03: INTERPOLATE GROWTH MODEL ==========\n") -tryCatch({ - source("r_app/03_interpolate_growth_model.R") - main() # Call the main function - cat("✓ Script 03 completed\n") -}, error = function(e) { - cat("✗ Error in Script 03:", e$message, "\n") - pipeline_success <<- FALSE -}) - -# ============================================================================== -# SCRIPT 04: MOSAIC CREATION -# ============================================================================== -cat("\n========== RUNNING SCRIPT 04: MOSAIC CREATION ==========\n") -tryCatch({ - source("r_app/04_mosaic_creation.R") - main() # Call the main function - cat("✓ Script 04 completed\n") -}, error = function(e) { - cat("✗ Error in Script 04:", e$message, "\n") - pipeline_success <<- FALSE -}) - -# ============================================================================== -# SCRIPT 09: CALCULATE KPIs -# ============================================================================== -cat("\n========== RUNNING SCRIPT 09: CALCULATE KPIs ==========\n") -tryCatch({ - source("r_app/09_calculate_kpis.R") - main() # Call the main function - cat("✓ Script 09 (KPIs) completed\n") -}, error = function(e) { - cat("✗ Error in Script 09 (KPIs):", e$message, "\n") - pipeline_success <<- FALSE -}) - -# ============================================================================== -# SCRIPT 09: FIELD ANALYSIS WEEKLY -# ============================================================================== -# Only run field analysis weekly for angata project -if (project_dir == "angata") { - cat("\n========== RUNNING SCRIPT 09: FIELD ANALYSIS WEEKLY ==========\n") - tryCatch({ - source("r_app/09_field_analysis_weekly.R") - main() # Call the main function - cat("✓ Script 09 (Weekly) completed\n") - }, error = function(e) { - cat("✗ Error in Script 09 (Weekly):", e$message, "\n") - pipeline_success <<- FALSE - }) +# Check Script 10 outputs (tiled splits) +tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") +tiles_dates <- if (dir.exists(tiles_dir)) { + list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) } else { - cat("\n========== SKIPPING SCRIPT 09: FIELD ANALYSIS WEEKLY (only runs for angata) ==========\n") + c() } +cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates))) + +# Check Script 20 outputs (CI extraction) - daily RDS files +ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") +ci_files <- if (dir.exists(ci_daily_dir)) { + list.files(ci_daily_dir, pattern = "\\.rds$") +} else { + c() +} +cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files))) + +# Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this +# Instead, check if CI RDS files exist - if they do, 21 should also run +# For now, just note that CSV is time-dependent, not a good skip indicator +cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n") + +# Check Script 40 outputs (mosaics in weekly_tile_max/5x5) +mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") +mosaic_files <- if (dir.exists(mosaic_dir)) { + list.files(mosaic_dir, pattern = "\\.tif$") +} else { + c() +} +cat(sprintf("Script 40: %d mosaic files exist\n", length(mosaic_files))) + +# Check Script 80 outputs (KPIs in reports/kpis/field_stats) +kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") +kpi_files <- if (dir.exists(kpi_dir)) { + list.files(kpi_dir, pattern = "\\.csv$|\\.json$") +} else { + c() +} +cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files))) + +# Determine if scripts should run based on outputs +skip_10 <- length(tiles_dates) > 0 && !force_rerun +skip_20 <- length(ci_files) > 0 && !force_rerun +skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped +skip_40 <- length(mosaic_files) > 0 && !force_rerun +skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks + +cat("\nSkipping decisions:\n") +cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP (tiles exist)" else "RUN")) +cat(sprintf(" Script 20: %s\n", if(skip_20) "SKIP (CI exists)" else "RUN")) +cat(sprintf(" Script 21: %s\n", if(skip_21) "SKIP (CI exists)" else "RUN")) +cat(sprintf(" Script 40: %s\n", if(skip_40) "SKIP (mosaics exist)" else "RUN")) +cat(sprintf(" Script 80: %s\n", if(skip_80) "SKIP (KPIs exist)" else "RUN")) # ============================================================================== -# SCRIPT 91: CI REPORT ANGATA (only for angata) +# PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY) # ============================================================================== -if (project_dir == "angata") { - cat("\n========== RUNNING SCRIPT 91: CI REPORT ANGATA ==========\n") - if (pipeline_success) { - tryCatch({ - rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd", - output_format = "word_document", - params = list(data_dir = project_dir, report_date = end_date_str)) - cat("✓ Script 91 (Report) completed\n") - }, error = function(e) { - cat("✗ Error in Script 91 (Report):", e$message, "\n") - }) - } else { - cat("✗ Skipping Script 91: Previous pipeline scripts failed\n") +cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n") +tryCatch({ + # Setup paths + base_path <- file.path("laravel_app", "storage", "app", project_dir) + merged_tifs_dir <- file.path(base_path, data_source) + + # Get existing dates from raw TIFFs + existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) + + # Get existing dates from tiles (better indicator of completion) + existing_tile_dates <- tiles_dates + + # Find missing dates in the window + start_date <- end_date - offset + date_seq <- seq(start_date, end_date, by = "day") + target_dates <- format(date_seq, "%Y-%m-%d") + + # Only download if tiles don't exist yet (more reliable than checking raw TIFFs) + missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)] + + cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates))) + cat(sprintf(" Missing dates in window: %d\n", length(missing_dates))) + + # Download each missing date + download_count <- 0 + download_failed <- 0 + + if (length(missing_dates) > 0) { + # Save current directory + original_dir <- getwd() + + # Change to python_app directory so relative paths work correctly + setwd("python_app") + + for (date_str in missing_dates) { + cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str) + result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE) + if (result == 0) { + download_count <- download_count + 1 + } else { + download_failed <- download_failed + 1 + } } + + # Change back to original directory + setwd(original_dir) + } + + cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed)) + if (download_failed > 0) { + cat("⚠ Some downloads failed, but continuing pipeline\n") + } + + # Force Script 10 to run ONLY if downloads actually succeeded (not just attempted) + if (download_count > 0) { + skip_10 <- FALSE + } + +}, error = function(e) { + cat("✗ Error in planet download:", e$message, "\n") + pipeline_success <<- FALSE +}) + +# ============================================================================== +# SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs +# ============================================================================== +if (pipeline_success && !skip_10) { + cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n") + tryCatch({ + # Set environment variables for the script (Script 10 uses these for filtering) + assign("PROJECT", project_dir, envir = .GlobalEnv) + + # Suppress verbose per-date output, show only summary + sink(nullfile()) + source("r_app/10_create_master_grid_and_split_tiffs.R") + sink() + + # Verify output + tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") + if (dir.exists(tiles_dir)) { + subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) + cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs))) + } else { + cat("✓ Script 10 completed\n") + } + }, error = function(e) { + sink() + cat("✗ Error in Script 10:", e$message, "\n") + pipeline_success <<- FALSE + }) +} else if (skip_10) { + cat("\n========== SKIPPING SCRIPT 10 (tiles already exist) ==========\n") } # ============================================================================== -# SCRIPT 10: CI REPORT (SIMPLE) +# SCRIPT 20: CI EXTRACTION # ============================================================================== -# Only run CI report for non-angata projects +if (pipeline_success && !skip_20) { + cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n") + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + assign("data_source", data_source, envir = .GlobalEnv) + + source("r_app/20_ci_extraction.R") + main() # Call main() to execute the script with the environment variables + + # Verify CI output was created + ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") + if (dir.exists(ci_daily_dir)) { + files <- list.files(ci_daily_dir, pattern = "\\.rds$") + cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files))) + } else { + cat("✓ Script 20 completed\n") + } + }, error = function(e) { + cat("✗ Error in Script 20:", e$message, "\n") + pipeline_success <<- FALSE + }) +} else if (skip_20) { + cat("\n========== SKIPPING SCRIPT 20 (CI already extracted) ==========\n") +} +# ============================================================================== +# SCRIPT 21: CONVERT CI RDS TO CSV +# ============================================================================== +if (pipeline_success && !skip_21) { + cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n") + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + + source("r_app/21_convert_ci_rds_to_csv.R") + main() # Call main() to execute the script with the environment variables + + # Verify CSV output was created + ci_csv_path <- file.path("laravel_app", "storage", "app", project_dir, "ci_extracted") + if (dir.exists(ci_csv_path)) { + csv_files <- list.files(ci_csv_path, pattern = "\\.csv$") + cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files))) + } else { + cat("✓ Script 21 completed\n") + } + }, error = function(e) { + cat("✗ Error in Script 21:", e$message, "\n") + pipeline_success <<- FALSE + }) +} else if (skip_21) { + cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n") +} -if (project_dir != "angata") { - cat("\n========== RUNNING SCRIPT 10: CI REPORT SIMPLE ==========\n") - if (pipeline_success) { - tryCatch({ - rmarkdown::render("r_app/10_CI_report_with_kpis_simple.Rmd", - output_format = "word_document", - params = list(data_dir = project_dir, report_date = end_date_str)) - cat("✓ Script 10 (Report) completed\n") - }, error = function(e) { - cat("✗ Error in Script 10 (Report):", e$message, "\n") - }) - } else { - cat("✗ Skipping Script 10: Previous pipeline scripts failed\n") - } - } else { - cat("\n========== SKIPPING SCRIPT 10: CI REPORT SIMPLE (not applicable for angata) ==========\n") - } +# ============================================================================== +# SCRIPT 30: INTERPOLATE GROWTH MODEL +# ============================================================================== +if (pipeline_success) { + cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + assign("data_source", data_source, envir = .GlobalEnv) + + source("r_app/30_interpolate_growth_model.R") + main() # Call main() to execute the script with the environment variables + + # Verify interpolated output + growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated") + if (dir.exists(growth_dir)) { + files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$") + cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files))) + } else { + cat("✓ Script 30 completed\n") + } + }, error = function(e) { + cat("✗ Error in Script 30:", e$message, "\n") + pipeline_success <<- FALSE + }) +} + +# ============================================================================== +# PYTHON 31: HARVEST IMMINENT WEEKLY +# ============================================================================== +if (pipeline_success) { + cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n") + tryCatch({ + # Run Python script in pytorch_gpu conda environment + # Script expects positional project name (not --project flag) + # Run from smartcane root so conda can find the environment + cmd <- sprintf('conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s', project_dir) + cat("DEBUG: Running command:", cmd, "\n") + result <- system(cmd) + + if (result == 0) { + # Verify harvest output - check for THIS WEEK's specific file + current_week <- as.numeric(format(end_date, "%V")) + current_year <- as.numeric(format(end_date, "%Y")) + expected_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats", + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) + + if (file.exists(expected_file)) { + cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", current_week)) + } else { + cat("✓ Script 31 completed (check if harvest.xlsx is available)\n") + } + } else { + cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n") + } + }, error = function(e) { + setwd(original_dir) + cat("⚠ Script 31 error:", e$message, "\n") + }) +} + +# ============================================================================== +# SCRIPT 40: MOSAIC CREATION +# ============================================================================== +if (pipeline_success && !skip_40) { + cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n") + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + assign("data_source", data_source, envir = .GlobalEnv) + + source("r_app/40_mosaic_creation.R") + main() # Call main() to execute the script with the environment variables + + # Verify mosaic output + mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") + if (dir.exists(mosaic_dir)) { + files <- list.files(mosaic_dir, pattern = "\\.tif$") + cat(sprintf("✓ Script 40 completed - generated %d mosaic files\n", length(files))) + } else { + cat("✓ Script 40 completed\n") + } + }, error = function(e) { + cat("✗ Error in Script 40:", e$message, "\n") + pipeline_success <<- FALSE + }) +} else if (skip_40) { + cat("\n========== SKIPPING SCRIPT 40 (mosaics already created) ==========\n") +} + +# ============================================================================== +# SCRIPT 80: CALCULATE KPIs +# ============================================================================== +if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the current week + cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n") + tryCatch({ + # Set environment variables for the script (Script 80's main() uses these as fallbacks) + # NOTE: end_date is already a Date, just assign directly without as.Date() + assign("end_date", end_date, envir = .GlobalEnv) + assign("end_date_str", end_date_str, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + assign("data_source", data_source, envir = .GlobalEnv) + + source("r_app/80_calculate_kpis.R") + main() # Call main() to execute the script with the environment variables + + # Verify KPI output + kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") + if (dir.exists(kpi_dir)) { + files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") + cat(sprintf("✓ Script 80 completed - generated %d KPI files\n", length(files))) + } else { + cat("✓ Script 80 completed\n") + } + }, error = function(e) { + cat("✗ Error in Script 80:", e$message, "\n") + cat("Full error:\n") + print(e) + pipeline_success <<- FALSE + }) +} # ============================================================================== # SUMMARY @@ -154,4 +396,9 @@ cat("\n========== PIPELINE COMPLETE ==========\n") cat(sprintf("Project: %s\n", project_dir)) cat(sprintf("End Date: %s\n", end_date_str)) cat(sprintf("Offset: %d days\n", offset)) -cat("Scripts executed: 02, 03, 04, 09 (KPIs), 09 (Weekly), 10 (CI Report)\n") +if (pipeline_success) { + cat("Status: ✓ All scripts completed successfully\n") +} else { + cat("Status: ✗ Pipeline failed - check errors above\n") +} +cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n")