working on visualisation, not working too well...
This commit is contained in:
parent
9f312131d7
commit
c11b10a73f
|
|
@ -7,18 +7,19 @@ is approaching harvest. Use this for operational decision-making and real-time a
|
|||
|
||||
RUN FREQUENCY: Weekly (or daily if required)
|
||||
INPUT:
|
||||
- ci_data_for_python.csv (recent CI data from 02b_convert_rds_to_csv.R)
|
||||
- harvest.xlsx (baseline from scripts 22+23 - contains last harvest date per field)
|
||||
Location: laravel_app/storage/app/{project}/Data/harvest.xlsx
|
||||
- ci_data_for_python.csv (complete CI data from R script)
|
||||
Location: laravel_app/storage/app/{project}/Data/extracted_ci/ci_data_for_python/ci_data_for_python.csv
|
||||
- harvest_production_export.xlsx (baseline from script 01 - optional, for reference)
|
||||
OUTPUT:
|
||||
- harvest_imminent_weekly.csv (weekly probabilities: field, imminent_prob, detected_prob, week, year)
|
||||
- reports/kpis/field_stats/{project}_harvest_imminent_week_{WW}_{YYYY}.csv (weekly probabilities: field, imminent_prob, detected_prob, week, year)
|
||||
|
||||
Workflow:
|
||||
1. Load harvest_production_export.xlsx (baseline dates - optional, for context)
|
||||
2. Load ci_data_for_python.csv (recent CI data)
|
||||
3. For each field, extract last 300 days of history
|
||||
4. Run Model 307 inference on full sequence (last timestep probabilities)
|
||||
5. Export harvest_imminent_weekly.csv with probabilities
|
||||
1. Load harvest.xlsx to find last harvest date (season_end) per field
|
||||
2. Load ci_data_for_python.csv (complete CI data)
|
||||
3. For each field, extract all CI data AFTER last harvest (complete current season)
|
||||
4. Run Model 307 inference on full season sequence (last timestep probabilities)
|
||||
5. Export week_WW_YYYY.csv with probabilities
|
||||
|
||||
Output Columns:
|
||||
- field: Field ID
|
||||
|
|
@ -61,33 +62,34 @@ from harvest_date_pred_utils import (
|
|||
|
||||
|
||||
def load_harvest_dates(harvest_file):
|
||||
"""Load latest harvest end dates from Excel file (from harvest_production_export.xlsx)."""
|
||||
print("[1/5] Loading harvest dates...")
|
||||
"""Load last harvest end dates from harvest.xlsx (output from scripts 22+23)."""
|
||||
print("[1/5] Loading harvest data for season boundaries...")
|
||||
|
||||
if not Path(harvest_file).exists():
|
||||
print(f" ERROR: {harvest_file} not found")
|
||||
print(" Using 180-day lookback as default")
|
||||
print(f" harvest.xlsx is required to determine current season boundaries")
|
||||
return None
|
||||
|
||||
try:
|
||||
harvest_df = pd.read_excel(harvest_file)
|
||||
print(f" Loaded {len(harvest_df)} field-season records")
|
||||
print(f" Loaded {len(harvest_df)} season records")
|
||||
|
||||
# Use season_end_date column (output from harvest prediction script)
|
||||
harvest_df['season_end_date'] = pd.to_datetime(harvest_df['season_end_date'])
|
||||
# season_end contains the last harvest date for each season
|
||||
harvest_df['season_end'] = pd.to_datetime(harvest_df['season_end'])
|
||||
harvest_df['field'] = harvest_df['field'].astype(str).str.strip()
|
||||
|
||||
# Group by field and get the latest season_end_date
|
||||
# Group by field and get the LATEST season_end_date (most recent harvest)
|
||||
# This marks the start of the current season
|
||||
harvest_dates = {}
|
||||
for field_id, group in harvest_df.groupby('field'):
|
||||
latest_end = group['season_end_date'].max()
|
||||
harvest_dates[str(field_id).strip()] = latest_end
|
||||
latest_harvest = group['season_end'].max()
|
||||
harvest_dates[field_id] = latest_harvest
|
||||
|
||||
print(f" Successfully mapped {len(harvest_dates)} fields")
|
||||
print(f" Harvest end dates range: {min(harvest_dates.values()).date()} to {max(harvest_dates.values()).date()}")
|
||||
print(f" Last harvest dates range: {min(harvest_dates.values()).date()} to {max(harvest_dates.values()).date()}")
|
||||
return harvest_dates
|
||||
except Exception as e:
|
||||
print(f" ERROR loading harvest file: {e}")
|
||||
print(f" Using 180-day lookback instead")
|
||||
print(f" ERROR loading harvest.xlsx: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -212,26 +214,37 @@ def main():
|
|||
# Get project name from command line or use default
|
||||
project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
|
||||
|
||||
# Construct paths
|
||||
base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
|
||||
# Construct paths - work from either python_app/ or root smartcane/ directory
|
||||
# Try root first (laravel_app/...), then fall back to ../ (running from python_app/)
|
||||
if Path("laravel_app/storage/app").exists():
|
||||
base_storage = Path("laravel_app/storage/app") / project_name / "Data"
|
||||
else:
|
||||
base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
|
||||
|
||||
ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
|
||||
CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
|
||||
harvest_data_dir = base_storage / "HarvestData"
|
||||
BASELINE_FILE = harvest_data_dir / "harvest_production_export.xlsx"
|
||||
OUTPUT_CSV = harvest_data_dir / "harvest_imminent_weekly.csv"
|
||||
harvest_data_dir.mkdir(parents=True, exist_ok=True) # Create if doesn't exist
|
||||
HARVEST_FILE = base_storage / "harvest.xlsx" # Output from scripts 22+23
|
||||
|
||||
# Determine week and year from current date for timestamped export
|
||||
today = datetime.now()
|
||||
week_num = int(today.strftime('%V'))
|
||||
year_num = int(today.strftime('%Y'))
|
||||
|
||||
# Output directory: reports/kpis/field_stats/
|
||||
reports_dir = base_storage.parent / "reports" / "kpis" / "field_stats"
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
OUTPUT_CSV = reports_dir / f"{project_name}_harvest_imminent_week_{week_num:02d}_{year_num}.csv"
|
||||
|
||||
print("="*80)
|
||||
print(f"HARVEST IMMINENT PROBABILITY - WEEKLY MONITORING ({project_name})")
|
||||
print("="*80)
|
||||
|
||||
# [1] Load harvest dates (optional - for projects with predictions)
|
||||
harvest_dates = None
|
||||
if BASELINE_FILE.exists():
|
||||
harvest_dates = load_harvest_dates(BASELINE_FILE)
|
||||
else:
|
||||
print("[1/5] Loading harvest dates...")
|
||||
print(f" INFO: {BASELINE_FILE} not found (optional for weekly monitoring)")
|
||||
# [1] Load harvest dates (required to determine season boundaries)
|
||||
harvest_dates = load_harvest_dates(HARVEST_FILE)
|
||||
if harvest_dates is None or len(harvest_dates) == 0:
|
||||
print(f"ERROR: Cannot run without harvest.xlsx - required to determine current season boundaries")
|
||||
print(f" Please run scripts 22 (baseline prediction) and 23 (format conversion) first")
|
||||
return
|
||||
|
||||
# [2] Load CI data
|
||||
print(f"\n[2/5] Loading CI data...")
|
||||
|
|
@ -271,6 +284,9 @@ def main():
|
|||
|
||||
count = 0
|
||||
for field_id in ci_data['field'].unique():
|
||||
# Convert field_id to string for consistency
|
||||
field_id_str = str(field_id).strip()
|
||||
|
||||
# Get metadata
|
||||
meta = field_meta[field_meta['field'] == field_id]
|
||||
if len(meta) == 0:
|
||||
|
|
@ -279,18 +295,21 @@ def main():
|
|||
sub_field = meta['sub_field'].iloc[0]
|
||||
latest_date = meta['latest_date'].iloc[0]
|
||||
|
||||
# Use recent CI history (last 300 days from latest available data)
|
||||
# Get last harvest date for this field (start of current season)
|
||||
last_harvest = harvest_dates.get(field_id_str)
|
||||
if last_harvest is None:
|
||||
continue
|
||||
|
||||
# Extract all CI data AFTER last harvest (complete current season)
|
||||
field_data = ci_data[ci_data['field'] == field_id].copy()
|
||||
field_data = field_data.sort_values('Date')
|
||||
field_data = field_data[field_data['Date'] > last_harvest] # After last harvest
|
||||
|
||||
# Keep last 300 days of history for inference
|
||||
if len(field_data) > 300:
|
||||
field_data = field_data.iloc[-300:]
|
||||
|
||||
# Need at least 30 days of data since planting
|
||||
if len(field_data) < 30:
|
||||
continue
|
||||
|
||||
# Run inference on recent history to predict next 28 days
|
||||
# Run inference on full current season to predict next 28 days
|
||||
imminent_prob, detected_prob = run_inference_on_season(
|
||||
field_data, model, config, scalers, device, ci_column
|
||||
)
|
||||
|
|
@ -338,10 +357,11 @@ def main():
|
|||
print(f" WARNING: No results exported - check CI data availability")
|
||||
|
||||
print(f"\nStorage structure:")
|
||||
print(f" Input CI: laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
|
||||
print(f" Input baseline: laravel_app/storage/app/{project_name}/Data/HarvestData/harvest_production_export.xlsx")
|
||||
print(f" Output: laravel_app/storage/app/{project_name}/Data/HarvestData/")
|
||||
print(f"\nReady to load into 09b field analysis report")
|
||||
print(f" Input harvest: laravel_app/storage/app/{project_name}/Data/harvest.xlsx")
|
||||
print(f" Input CI: laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
|
||||
print(f" Output: laravel_app/storage/app/{project_name}/reports/kpis/field_stats/")
|
||||
print(f" Filename: {project_name}_harvest_imminent_week_{week_num:02d}_{year_num}.csv")
|
||||
print(f"\nReady to load into 80_calculate_kpis.R")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
288
python_app/batch_rgb_validation_top_fields.py
Normal file
288
python_app/batch_rgb_validation_top_fields.py
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Batch RGB Validation for Top 50 Largest Fields
|
||||
|
||||
Generates 5x3 RGB temporal grids for the latest complete harvest season of the 50 largest fields.
|
||||
Uses actual season_end dates from harvest.xlsx for visual validation of field conditions at harvest.
|
||||
|
||||
Configuration:
|
||||
- GeoJSON: pivot.geojson (defines field boundaries and sizes)
|
||||
- Harvest data: harvest.xlsx (season_end dates for completed harvests)
|
||||
- CI data: ci_data_for_python.csv
|
||||
- Output: RGB directory with field_name_YYYYMMDD_harvest_rgb.png
|
||||
|
||||
Usage:
|
||||
python batch_rgb_validation_top_fields.py
|
||||
|
||||
Output:
|
||||
- Saves 5x3 RGB grids to: laravel_app/storage/app/angata/RGB/
|
||||
- Filenames: field_<NAME>_<YYYYMMDD>_harvest_rgb.png
|
||||
- Each grid shows 15 images at 7-day intervals around the season_end date
|
||||
"""
|
||||
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import sys
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from rgb_visualization import generate_rgb_grids
|
||||
|
||||
|
||||
def load_geojson_and_calculate_areas(geojson_path):
|
||||
"""
|
||||
Load GeoJSON and calculate area for each field.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: Columns [field, field_name, area_m2] sorted by area descending
|
||||
"""
|
||||
geojson_path = Path(geojson_path)
|
||||
|
||||
if not geojson_path.exists():
|
||||
print(f"✗ GeoJSON not found: {geojson_path}")
|
||||
return None
|
||||
|
||||
print(f"Loading GeoJSON: {geojson_path}")
|
||||
|
||||
with open(geojson_path) as f:
|
||||
geojson_data = json.load(f)
|
||||
|
||||
fields = []
|
||||
|
||||
for feature in geojson_data.get('features', []):
|
||||
props = feature.get('properties', {})
|
||||
field_id = str(props.get('field', ''))
|
||||
field_name = props.get('name', f"field_{field_id}")
|
||||
|
||||
geometry = feature.get('geometry', {})
|
||||
geom_type = geometry.get('type', '')
|
||||
coordinates = geometry.get('coordinates', [])
|
||||
|
||||
# Simple area calculation using Shoelace formula
|
||||
area_m2 = 0
|
||||
if geom_type == 'Polygon' and coordinates:
|
||||
coords = coordinates[0] # Exterior ring
|
||||
area_m2 = calculate_polygon_area(coords)
|
||||
elif geom_type == 'MultiPolygon' and coordinates:
|
||||
for poly_coords in coordinates:
|
||||
area_m2 += calculate_polygon_area(poly_coords[0])
|
||||
|
||||
if area_m2 > 0:
|
||||
fields.append({
|
||||
'field': field_id,
|
||||
'field_name': field_name,
|
||||
'area_m2': area_m2,
|
||||
'area_hectares': area_m2 / 10000
|
||||
})
|
||||
|
||||
df = pd.DataFrame(fields)
|
||||
df = df.sort_values('area_m2', ascending=False)
|
||||
|
||||
print(f" ✓ Loaded {len(df)} fields")
|
||||
print(f" Top 10 largest fields (hectares):")
|
||||
for i, row in df.head(10).iterrows():
|
||||
print(f" {row['field_name']:30s} ({row['field']:>6s}): {row['area_hectares']:>8.2f} ha")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def calculate_polygon_area(coords):
|
||||
"""
|
||||
Calculate area of polygon using Shoelace formula (in m²).
|
||||
Assumes coordinates are in lat/lon (roughly converts to meters).
|
||||
"""
|
||||
if len(coords) < 3:
|
||||
return 0
|
||||
|
||||
# Rough conversion: at equator, 1 degree ≈ 111 km
|
||||
# For lat/lon coordinates, use average latitude
|
||||
lats = [c[1] for c in coords]
|
||||
avg_lat = np.mean(lats)
|
||||
lat_m_per_deg = 111000
|
||||
lon_m_per_deg = 111000 * np.cos(np.radians(avg_lat))
|
||||
|
||||
# Convert to meters
|
||||
coords_m = []
|
||||
for lon, lat in coords:
|
||||
x = (lon - coords[0][0]) * lon_m_per_deg
|
||||
y = (lat - coords[0][1]) * lat_m_per_deg
|
||||
coords_m.append((x, y))
|
||||
|
||||
# Shoelace formula
|
||||
area = 0
|
||||
for i in range(len(coords_m)):
|
||||
j = (i + 1) % len(coords_m)
|
||||
area += coords_m[i][0] * coords_m[j][1]
|
||||
area -= coords_m[j][0] * coords_m[i][1]
|
||||
|
||||
return abs(area) / 2
|
||||
|
||||
|
||||
def load_harvest_dates_from_xlsx(harvest_xlsx_path, top_50_fields_df):
|
||||
"""
|
||||
Load harvest data from Excel file and get latest completed season for each field.
|
||||
|
||||
Returns season_end date for each field (latest complete season where season_end is not null).
|
||||
|
||||
Args:
|
||||
harvest_xlsx_path (Path): Path to harvest.xlsx
|
||||
top_50_fields_df (pd.DataFrame): DataFrame with 'field' column for filtering
|
||||
|
||||
Returns:
|
||||
dict: {field_id: {'field_name': str, 'harvest_date': pd.Timestamp}}
|
||||
"""
|
||||
harvest_xlsx_path = Path(harvest_xlsx_path)
|
||||
|
||||
if not harvest_xlsx_path.exists():
|
||||
print(f"✗ Harvest Excel file not found: {harvest_xlsx_path}")
|
||||
return {}
|
||||
|
||||
print(f"Loading harvest data: {harvest_xlsx_path}")
|
||||
|
||||
try:
|
||||
harvest_df = pd.read_excel(harvest_xlsx_path)
|
||||
|
||||
# Ensure date columns are datetime
|
||||
if 'season_end' in harvest_df.columns:
|
||||
harvest_df['season_end'] = pd.to_datetime(harvest_df['season_end'], errors='coerce')
|
||||
|
||||
# Filter to top 50 fields and get only rows with season_end filled in
|
||||
top_50_field_ids = set(top_50_fields_df['field'].astype(str).str.strip())
|
||||
harvest_df['field'] = harvest_df['field'].astype(str).str.strip()
|
||||
harvest_df = harvest_df[harvest_df['field'].isin(top_50_field_ids)]
|
||||
harvest_df = harvest_df[harvest_df['season_end'].notna()]
|
||||
|
||||
# Group by field and get the LATEST (most recent) season_end
|
||||
latest_harvests = {}
|
||||
|
||||
for field_id in top_50_field_ids:
|
||||
field_records = harvest_df[harvest_df['field'] == field_id]
|
||||
|
||||
if len(field_records) > 0:
|
||||
# Get row with latest season_end
|
||||
latest_idx = field_records['season_end'].idxmax()
|
||||
latest_row = field_records.loc[latest_idx]
|
||||
|
||||
# Get field name from top_50_fields_df
|
||||
field_info = top_50_fields_df[top_50_fields_df['field'] == field_id]
|
||||
if len(field_info) > 0:
|
||||
field_name = field_info.iloc[0]['field_name']
|
||||
else:
|
||||
field_name = f"field_{field_id}"
|
||||
|
||||
latest_harvests[field_id] = {
|
||||
'field_name': field_name,
|
||||
'harvest_date': latest_row['season_end']
|
||||
}
|
||||
|
||||
print(f" ✓ Loaded latest complete seasons for {len(latest_harvests)} fields")
|
||||
|
||||
return latest_harvests
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Error loading harvest data: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def main():
|
||||
print("="*90)
|
||||
print("BATCH RGB VALIDATION - TOP 50 LARGEST FIELDS")
|
||||
print("Visual inspection of latest harvest dates from harvest.xlsx using RGB imagery")
|
||||
print("="*90)
|
||||
|
||||
# Configuration
|
||||
geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
|
||||
harvest_xlsx = Path("laravel_app/storage/app/angata/Data/harvest.xlsx")
|
||||
output_dir = Path("laravel_app/storage/app/angata/RGB")
|
||||
tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
|
||||
|
||||
# Verify paths
|
||||
if not geojson_path.exists():
|
||||
print(f"✗ GeoJSON not found: {geojson_path}")
|
||||
return
|
||||
if not harvest_xlsx.exists():
|
||||
print(f"✗ Harvest Excel not found: {harvest_xlsx}")
|
||||
return
|
||||
if not tiff_dir.exists():
|
||||
print(f"✗ TIFF directory not found: {tiff_dir}")
|
||||
return
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Step 1: Load GeoJSON and get top 50 largest fields
|
||||
print("\n[1/4] Loading GeoJSON and identifying top 50 largest fields...")
|
||||
fields_df = load_geojson_and_calculate_areas(geojson_path)
|
||||
if fields_df is None:
|
||||
return
|
||||
|
||||
top_50_fields = fields_df.head(50)
|
||||
print(f" ✓ Selected {len(top_50_fields)} largest fields for processing")
|
||||
|
||||
# Step 2: Load harvest dates from Excel
|
||||
print("\n[2/4] Loading harvest dates from Excel (latest complete seasons)...")
|
||||
harvest_dates = load_harvest_dates_from_xlsx(harvest_xlsx, top_50_fields)
|
||||
|
||||
if len(harvest_dates) == 0:
|
||||
print("✗ No harvest dates found in Excel file")
|
||||
return
|
||||
|
||||
print(f" ✓ Found {len(harvest_dates)} fields with completed seasons")
|
||||
for field_id, info in list(harvest_dates.items())[:5]:
|
||||
print(f" - {info['field_name']:30s}: {info['harvest_date'].strftime('%Y-%m-%d')}")
|
||||
if len(harvest_dates) > 5:
|
||||
print(f" ... and {len(harvest_dates) - 5} more")
|
||||
|
||||
# Step 3: Generate RGB grids for each field
|
||||
print("\n[3/4] Generating RGB validation grids...")
|
||||
rgb_count = 0
|
||||
|
||||
for idx, (field_id, harvest_info) in enumerate(harvest_dates.items(), 1):
|
||||
field_name = harvest_info['field_name']
|
||||
harvest_date = harvest_info['harvest_date']
|
||||
|
||||
try:
|
||||
# Run RGB visualization (harvest dates only, no registered/predicted distinction)
|
||||
results = generate_rgb_grids(
|
||||
field_data=None, # Not needed - just for function compatibility
|
||||
field_id=field_id,
|
||||
registered_harvest_dates=[], # Empty - using harvest.xlsx instead
|
||||
predicted_harvest_dates=[
|
||||
{
|
||||
'harvest_date': harvest_date,
|
||||
'model_name': 'harvest_xlsx'
|
||||
}
|
||||
],
|
||||
output_dir=str(output_dir), # All PNGs in same folder
|
||||
tiff_dir=str(tiff_dir),
|
||||
geojson_path=str(geojson_path)
|
||||
)
|
||||
|
||||
if results['predicted']:
|
||||
rgb_count += 1
|
||||
print(f" [{idx:2d}/{len(harvest_dates)}] {field_name}: ✓ {harvest_date.strftime('%Y-%m-%d')}")
|
||||
else:
|
||||
print(f" [{idx:2d}/{len(harvest_dates)}] {field_name}: ⚠ No RGB grid (no imagery available)")
|
||||
|
||||
except Exception as e:
|
||||
print(f" [{idx:2d}/{len(harvest_dates)}] {field_name}: ✗ Error - {e}")
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*90)
|
||||
print(f"SUMMARY:")
|
||||
print(f" Fields with harvest dates: {len(harvest_dates)}")
|
||||
print(f" RGB grids generated: {rgb_count}/{len(harvest_dates)}")
|
||||
print(f" Output directory: {output_dir}")
|
||||
print("="*90)
|
||||
print("\nVisual inspection checklist:")
|
||||
print(" ✓ Brown/bare soil at T~0d (harvest date) = Field properly harvested")
|
||||
print(" ⚠ Green vegetation at T~0d = Possible data error or replanting")
|
||||
print(" ✓ Green → Brown progression = Normal harvest sequence")
|
||||
print("="*90)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
107
python_app/debug_all_tiles_for_date.py
Normal file
107
python_app/debug_all_tiles_for_date.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Debug script to find all tiles for a date and check which overlap with field boundary
|
||||
"""
|
||||
|
||||
import json
|
||||
import rasterio
|
||||
from rasterio.mask import mask
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import shapely.geometry as shgeom
|
||||
import pandas as pd
|
||||
|
||||
# Load field 79 boundary
|
||||
geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
|
||||
field_id = "79"
|
||||
|
||||
print(f"Loading field {field_id} from GeoJSON...")
|
||||
with open(geojson_path) as f:
|
||||
geojson_data = json.load(f)
|
||||
|
||||
field_boundary = None
|
||||
for feature in geojson_data.get('features', []):
|
||||
props = feature.get('properties', {})
|
||||
if str(props.get('field', '')) == str(field_id):
|
||||
geometry = feature.get('geometry')
|
||||
if geometry:
|
||||
geom_type = geometry.get('type', '')
|
||||
coordinates = geometry.get('coordinates', [])
|
||||
|
||||
if geom_type == 'MultiPolygon':
|
||||
if coordinates and len(coordinates) > 0:
|
||||
coords = coordinates[0][0]
|
||||
field_boundary = shgeom.Polygon(coords)
|
||||
elif geom_type == 'Polygon':
|
||||
if coordinates and len(coordinates) > 0:
|
||||
coords = coordinates[0]
|
||||
field_boundary = shgeom.Polygon(coords)
|
||||
break
|
||||
|
||||
if field_boundary is None:
|
||||
print(f"Field {field_id} not found")
|
||||
exit(1)
|
||||
|
||||
print(f"Field boundary bounds: {field_boundary.bounds}")
|
||||
print(f"Field boundary area: {field_boundary.area}")
|
||||
|
||||
# Find a specific date directory
|
||||
tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
|
||||
target_date = pd.Timestamp("2026-01-15") # Use a recent date that exists
|
||||
|
||||
# Find tiles for that date
|
||||
date_dirs = []
|
||||
for date_dir in tiff_dir.iterdir():
|
||||
if date_dir.is_dir():
|
||||
try:
|
||||
dir_name = date_dir.name
|
||||
date_str = dir_name.split('_')[0]
|
||||
tile_date = pd.Timestamp(date_str)
|
||||
if tile_date == target_date:
|
||||
date_dirs.append(date_dir)
|
||||
except:
|
||||
pass
|
||||
|
||||
if not date_dirs:
|
||||
print(f"No tiles found for {target_date}")
|
||||
exit(1)
|
||||
|
||||
print(f"\nFound {len(date_dirs)} date directory(ies) for {target_date}")
|
||||
|
||||
for date_dir in date_dirs:
|
||||
print(f"\n=== Checking date directory: {date_dir.name} ===")
|
||||
|
||||
tiles = list(date_dir.glob("*.tif"))
|
||||
print(f"Found {len(tiles)} tiles in this directory")
|
||||
|
||||
for tile_path in sorted(tiles):
|
||||
try:
|
||||
with rasterio.open(tile_path) as src:
|
||||
tile_bounds = src.bounds
|
||||
tile_geom = shgeom.box(*tile_bounds)
|
||||
|
||||
intersects = field_boundary.intersects(tile_geom)
|
||||
intersection = field_boundary.intersection(tile_geom) if intersects else None
|
||||
intersection_area = intersection.area if intersection else 0
|
||||
|
||||
print(f"\n{tile_path.name}")
|
||||
print(f" Tile bounds: {tile_bounds}")
|
||||
print(f" Intersects field: {intersects}")
|
||||
if intersects:
|
||||
print(f" Intersection area: {intersection_area:.8f}")
|
||||
|
||||
# Try to mask this tile
|
||||
geom = shgeom.mapping(field_boundary)
|
||||
try:
|
||||
masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3])
|
||||
print(f" ✓ Successfully masked! Shape: {masked_data.shape}")
|
||||
|
||||
# Check the data in each band
|
||||
for i, band_idx in enumerate([1, 2, 3]):
|
||||
band_data = masked_data[i]
|
||||
non_zero = (band_data != 0).sum()
|
||||
print(f" Band {band_idx}: {non_zero} non-zero pixels out of {band_data.size}")
|
||||
except ValueError as e:
|
||||
print(f" ✗ Masking failed: {e}")
|
||||
except Exception as e:
|
||||
print(f" Error reading tile: {e}")
|
||||
102
python_app/debug_field_mask.py
Normal file
102
python_app/debug_field_mask.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Debug script to diagnose why field boundary masking produces no data
|
||||
"""
|
||||
|
||||
import json
|
||||
import rasterio
|
||||
from rasterio.mask import mask
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import shapely.geometry as shgeom
|
||||
|
||||
# Load a sample field boundary
|
||||
geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
|
||||
field_id = "79" # A field that had issues
|
||||
|
||||
print(f"Loading field {field_id} from GeoJSON...")
|
||||
with open(geojson_path) as f:
|
||||
geojson_data = json.load(f)
|
||||
|
||||
field_boundary = None
|
||||
for feature in geojson_data.get('features', []):
|
||||
props = feature.get('properties', {})
|
||||
if str(props.get('field', '')) == str(field_id):
|
||||
geometry = feature.get('geometry')
|
||||
if geometry:
|
||||
geom_type = geometry.get('type', '')
|
||||
coordinates = geometry.get('coordinates', [])
|
||||
|
||||
if geom_type == 'MultiPolygon':
|
||||
if coordinates and len(coordinates) > 0:
|
||||
coords = coordinates[0][0]
|
||||
field_boundary = shgeom.Polygon(coords)
|
||||
elif geom_type == 'Polygon':
|
||||
if coordinates and len(coordinates) > 0:
|
||||
coords = coordinates[0]
|
||||
field_boundary = shgeom.Polygon(coords)
|
||||
break
|
||||
|
||||
if field_boundary is None:
|
||||
print(f"Field {field_id} not found")
|
||||
exit(1)
|
||||
|
||||
print(f"Field boundary bounds: {field_boundary.bounds}")
|
||||
print(f"Field boundary area: {field_boundary.area}")
|
||||
|
||||
# Load a sample TIFF tile
|
||||
tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
|
||||
tile_file = None
|
||||
for date_dir in sorted(tiff_dir.iterdir()):
|
||||
if date_dir.is_dir():
|
||||
for tif in date_dir.glob("*.tif"):
|
||||
if tif.stat().st_size > 12e6:
|
||||
tile_file = tif
|
||||
break
|
||||
if tile_file:
|
||||
break
|
||||
|
||||
if not tile_file:
|
||||
print("No suitable TIFF found")
|
||||
exit(1)
|
||||
|
||||
print(f"\nTesting with TIFF: {tile_file.name}")
|
||||
|
||||
with rasterio.open(tile_file) as src:
|
||||
print(f"TIFF Bounds: {src.bounds}")
|
||||
print(f"TIFF CRS: {src.crs}")
|
||||
|
||||
# Check if field boundary is within tile bounds
|
||||
tile_box = shgeom.box(*src.bounds)
|
||||
intersects = field_boundary.intersects(tile_box)
|
||||
print(f"Field boundary intersects tile: {intersects}")
|
||||
|
||||
if intersects:
|
||||
intersection = field_boundary.intersection(tile_box)
|
||||
print(f"Intersection area: {intersection.area}")
|
||||
print(f"Intersection bounds: {intersection.bounds}")
|
||||
|
||||
# Try to mask and see what we get
|
||||
print("\nAttempting to mask...")
|
||||
geom = shgeom.mapping(field_boundary)
|
||||
try:
|
||||
masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3])
|
||||
print(f"Masked data shape: {masked_data.shape}")
|
||||
print(f"Masked data dtype: {masked_data.dtype}")
|
||||
|
||||
# Check the data
|
||||
for i, band_idx in enumerate([1, 2, 3]):
|
||||
band_data = masked_data[i]
|
||||
print(f"\nBand {band_idx}:")
|
||||
print(f" min: {np.nanmin(band_data):.6f}")
|
||||
print(f" max: {np.nanmax(band_data):.6f}")
|
||||
print(f" mean: {np.nanmean(band_data):.6f}")
|
||||
print(f" % valid (non-zero): {(band_data != 0).sum() / band_data.size * 100:.2f}%")
|
||||
print(f" % NaN: {np.isnan(band_data).sum() / band_data.size * 100:.2f}%")
|
||||
|
||||
# Show sample values
|
||||
valid_pixels = band_data[band_data != 0]
|
||||
if len(valid_pixels) > 0:
|
||||
print(f" Sample valid values: {valid_pixels[:10]}")
|
||||
except ValueError as e:
|
||||
print(f"Error during masking: {e}")
|
||||
47
python_app/debug_tiff_inspect.py
Normal file
47
python_app/debug_tiff_inspect.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Debug script to inspect TIFF file structure and data
|
||||
"""
|
||||
|
||||
import rasterio
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
|
||||
# Pick a tile file to inspect
|
||||
tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
|
||||
|
||||
# Find first available tile
|
||||
tile_file = None
|
||||
for date_dir in sorted(tiff_dir.iterdir()):
|
||||
if date_dir.is_dir():
|
||||
for tif in date_dir.glob("*.tif"):
|
||||
if tif.stat().st_size > 12e6: # Skip empty files
|
||||
tile_file = tif
|
||||
break
|
||||
if tile_file:
|
||||
break
|
||||
|
||||
if not tile_file:
|
||||
print("No suitable TIFF files found")
|
||||
exit(1)
|
||||
|
||||
print(f"Inspecting: {tile_file.name}")
|
||||
print("=" * 80)
|
||||
|
||||
with rasterio.open(tile_file) as src:
|
||||
print(f"Band count: {src.count}")
|
||||
print(f"Data type: {src.dtypes[0]}")
|
||||
print(f"Shape: {src.height} x {src.width}")
|
||||
print(f"CRS: {src.crs}")
|
||||
print(f"Bounds: {src.bounds}")
|
||||
print()
|
||||
|
||||
# Read each band
|
||||
for band_idx in range(1, min(6, src.count + 1)):
|
||||
data = src.read(band_idx)
|
||||
print(f"Band {band_idx}:")
|
||||
print(f" dtype: {data.dtype}")
|
||||
print(f" range: {data.min():.6f} - {data.max():.6f}")
|
||||
print(f" mean: {data.mean():.6f}")
|
||||
print(f" % valid (non-zero): {(data != 0).sum() / data.size * 100:.1f}%")
|
||||
print()
|
||||
|
|
@ -0,0 +1,299 @@
|
|||
"""
|
||||
Batch Field Visualization Tool - RGB Imagery Around Harvest Date
|
||||
Purpose: Generate visual validation using RGB satellite imagery samples around
|
||||
predicted harvest date to verify predictions (bare soil = harvested, green = not harvested)
|
||||
|
||||
Shows 12-15 RGB images in a grid, centered around the predicted harvest date
|
||||
|
||||
Usage:
|
||||
python batch_plot_fields_rgb.py field1,field2,field3
|
||||
python batch_plot_fields_rgb.py 10125,88,97
|
||||
|
||||
Or read from CSV:
|
||||
python batch_plot_fields_rgb.py --file fields_to_check.csv
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import sys
|
||||
import rasterio
|
||||
from rasterio.mask import mask
|
||||
import geopandas as gpd
|
||||
from harvest_date_pred_utils import load_model_and_config, extract_features
|
||||
|
||||
|
||||
def get_field_centroid(field_id, geojson_path="pivot.geojson"):
|
||||
"""Get centroid of field from GeoJSON for cropping RGB images."""
|
||||
try:
|
||||
gdf = gpd.read_file(geojson_path)
|
||||
field_geom = gdf[gdf['field'] == str(field_id)]
|
||||
if len(field_geom) > 0:
|
||||
centroid = field_geom.geometry.iloc[0].centroid
|
||||
return (centroid.x, centroid.y)
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not get field centroid - {e}")
|
||||
return None
|
||||
|
||||
|
||||
def load_rgb_image(tif_path, field_id=None, geojson_path="pivot.geojson"):
|
||||
"""
|
||||
Load RGB bands from 8-band GeoTIFF
|
||||
Bands: 0=coastal, 1=blue, 2=green, 3=green_i, 4=yellow, 5=red, 6=rededge, 7=nir
|
||||
RGB = bands 5,3,1 (Red, Green, Blue)
|
||||
"""
|
||||
try:
|
||||
with rasterio.open(tif_path) as src:
|
||||
# Read RGB bands (bands are 1-indexed in rasterio)
|
||||
red = src.read(6) # Band 6 = red (0-indexed band 5)
|
||||
green = src.read(3) # Band 3 = green (0-indexed band 2)
|
||||
blue = src.read(2) # Band 2 = blue (0-indexed band 1)
|
||||
|
||||
# Stack into RGB image
|
||||
rgb = np.stack([red, green, blue], axis=2)
|
||||
|
||||
# Normalize to 0-1 range (8-band data is typically 0-10000)
|
||||
rgb = np.clip(rgb / 5000.0, 0, 1)
|
||||
|
||||
return rgb
|
||||
except Exception as e:
|
||||
print(f" Error loading RGB from {tif_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def plot_field_rgb_validation(field_id, ci_data, model, config, scalers, device,
|
||||
tif_folder="../../../laravel_app/storage/app/angata/merged_tif_8b",
|
||||
output_dir="validation_plots_rgb"):
|
||||
"""
|
||||
Create validation plot for a single field:
|
||||
- Top: Harvest probability over time with peak marked
|
||||
- Bottom: 12-15 RGB images in grid around predicted harvest date
|
||||
"""
|
||||
# Create output directory
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Filter field data
|
||||
field_data = ci_data[ci_data['field'] == field_id].copy()
|
||||
if len(field_data) == 0:
|
||||
print(f" ✗ Field {field_id}: No CI data found")
|
||||
return False
|
||||
|
||||
field_data = field_data.sort_values('Date')
|
||||
print(f" ✓ Field {field_id}: {len(field_data)} days of data")
|
||||
|
||||
try:
|
||||
# Extract features and run inference
|
||||
ci_column = config['data']['ci_column']
|
||||
feature_names = config['features']
|
||||
|
||||
feat_array = extract_features(field_data, feature_names, ci_column=ci_column)
|
||||
if feat_array is None:
|
||||
print(f" ✗ Field {field_id}: Feature extraction failed")
|
||||
return False
|
||||
|
||||
# Apply scalers
|
||||
if isinstance(scalers, dict) and 'features' in scalers:
|
||||
feat_array = scalers['features'].transform(feat_array)
|
||||
|
||||
# Run inference
|
||||
with torch.no_grad():
|
||||
x_tensor = torch.tensor(feat_array, dtype=torch.float32).unsqueeze(0).to(device)
|
||||
out_imm, out_det = model(x_tensor)
|
||||
imm_probs = out_imm.squeeze(0).cpu().numpy()
|
||||
|
||||
# Find peak probability (predicted harvest date)
|
||||
peak_idx = np.argmax(imm_probs)
|
||||
peak_date = field_data['Date'].iloc[peak_idx]
|
||||
peak_prob = imm_probs[peak_idx]
|
||||
|
||||
print(f" Peak probability: {peak_prob:.3f} on {peak_date.strftime('%Y-%m-%d')}")
|
||||
|
||||
# Get date range: ±6 days around peak (12-13 images total)
|
||||
date_range = field_data['Date'].dt.date
|
||||
peak_date_only = peak_date.date() if hasattr(peak_date, 'date') else peak_date
|
||||
|
||||
days_before = 6
|
||||
days_after = 6
|
||||
start_date = peak_date_only - timedelta(days=days_before)
|
||||
end_date = peak_date_only + timedelta(days=days_after)
|
||||
|
||||
# Find available TIF files in date range
|
||||
tif_folder_path = Path(tif_folder)
|
||||
available_dates = []
|
||||
for tif_file in sorted(tif_folder_path.glob("*.tif")):
|
||||
date_str = tif_file.stem # YYYY-MM-DD
|
||||
try:
|
||||
tif_date = datetime.strptime(date_str, "%Y-%m-%d").date()
|
||||
if start_date <= tif_date <= end_date:
|
||||
available_dates.append((tif_date, tif_file))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if len(available_dates) == 0:
|
||||
print(f" Warning: No TIF files found in {start_date} to {end_date}")
|
||||
return False
|
||||
|
||||
print(f" Found {len(available_dates)} RGB images in date range")
|
||||
|
||||
# Load RGB images
|
||||
rgb_images = []
|
||||
rgb_dates = []
|
||||
for tif_date, tif_file in available_dates:
|
||||
rgb = load_rgb_image(str(tif_file), field_id)
|
||||
if rgb is not None:
|
||||
rgb_images.append(rgb)
|
||||
rgb_dates.append(tif_date)
|
||||
|
||||
if len(rgb_images) == 0:
|
||||
print(f" ✗ No RGB images loaded")
|
||||
return False
|
||||
|
||||
print(f" Loaded {len(rgb_images)} RGB images")
|
||||
|
||||
# Create figure with probability plot + RGB grid
|
||||
n_images = len(rgb_images)
|
||||
n_cols = min(5, n_images) # Max 5 columns
|
||||
n_rows = (n_images + n_cols - 1) // n_cols # Calculate rows needed
|
||||
|
||||
fig = plt.figure(figsize=(18, 12))
|
||||
|
||||
# Probability plot (top, spanning all columns)
|
||||
ax_prob = plt.subplot(n_rows + 1, n_cols, (1, n_cols))
|
||||
dates_arr = field_data['Date'].values
|
||||
ax_prob.plot(dates_arr, imm_probs, '-', color='orange', linewidth=2.5, label='Imminent Probability', alpha=0.8)
|
||||
ax_prob.axhline(y=0.5, color='red', linestyle='--', linewidth=1.5, alpha=0.5, label='Threshold (0.5)')
|
||||
ax_prob.axvline(x=peak_date, color='darkred', linestyle=':', linewidth=2, alpha=0.7, label='Peak')
|
||||
ax_prob.fill_between(dates_arr, 0.5, 1.0, alpha=0.08, color='red')
|
||||
ax_prob.set_ylim(-0.05, 1.05)
|
||||
ax_prob.set_ylabel('Probability', fontsize=11, fontweight='bold')
|
||||
ax_prob.set_xlabel('Date', fontsize=11, fontweight='bold')
|
||||
ax_prob.set_title(f'Field {field_id} - Model 307 Harvest Probability', fontsize=12, fontweight='bold')
|
||||
ax_prob.grid(True, alpha=0.3)
|
||||
ax_prob.legend(loc='upper right', fontsize=9)
|
||||
ax_prob.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
|
||||
ax_prob.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
||||
plt.setp(ax_prob.xaxis.get_majorticklabels(), rotation=45, ha='right')
|
||||
|
||||
# Annotate peak
|
||||
ax_prob.annotate(f'{peak_prob:.2f}\n{peak_date_only}',
|
||||
xy=(peak_date, peak_prob),
|
||||
xytext=(20, 20), textcoords='offset points',
|
||||
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.8),
|
||||
arrowprops=dict(arrowstyle='->', lw=1.5, color='darkred'))
|
||||
|
||||
# RGB images in grid (below probability plot)
|
||||
for i, (rgb, date) in enumerate(zip(rgb_images, rgb_dates)):
|
||||
ax = plt.subplot(n_rows + 1, n_cols, n_cols + i + 1)
|
||||
ax.imshow(rgb, extent=[0, 100, 0, 100])
|
||||
|
||||
# Highlight peak date
|
||||
date_label = date.strftime('%m-%d')
|
||||
is_peak = date == peak_date_only
|
||||
color = 'darkred' if is_peak else 'black'
|
||||
weight = 'bold' if is_peak else 'normal'
|
||||
size = 11 if is_peak else 9
|
||||
|
||||
ax.set_title(date_label, fontsize=size, fontweight=weight, color=color)
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
|
||||
plt.suptitle(f'Field {field_id} RGB Imagery: {len(rgb_images)} Days Around Peak Harvest Probability\nPeak: {peak_prob:.2f} on {peak_date_only} | Green = Growing | Brown/Bare = Harvested',
|
||||
fontsize=13, fontweight='bold', y=0.995)
|
||||
plt.tight_layout()
|
||||
|
||||
# Save
|
||||
output_file = Path(output_dir) / f"field_{field_id}_rgb_validation.png"
|
||||
plt.savefig(output_file, dpi=100, bbox_inches='tight')
|
||||
print(f" ✓ Saved: {output_file}")
|
||||
plt.close()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Field {field_id}: Error - {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
print("="*80)
|
||||
print("BATCH RGB VISUALIZATION TOOL")
|
||||
print("Visual check: RGB imagery around predicted harvest date")
|
||||
print("="*80)
|
||||
|
||||
# Parse arguments
|
||||
fields_to_plot = []
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("\nUsage:")
|
||||
print(" python batch_plot_fields_rgb.py field1,field2,field3")
|
||||
print(" python batch_plot_fields_rgb.py --file fields.csv")
|
||||
print("\nExample:")
|
||||
print(" python batch_plot_fields_rgb.py 10125,88,97,440")
|
||||
return
|
||||
|
||||
if sys.argv[1] == "--file":
|
||||
if len(sys.argv) < 3:
|
||||
print("ERROR: --file requires a CSV filename")
|
||||
return
|
||||
csv_file = sys.argv[2]
|
||||
print(f"\n[1/4] Loading fields from CSV: {csv_file}")
|
||||
try:
|
||||
df = pd.read_csv(csv_file)
|
||||
fields_to_plot = df['field'].astype(str).str.strip().tolist()
|
||||
print(f" ✓ Loaded {len(fields_to_plot)} fields")
|
||||
except Exception as e:
|
||||
print(f" ✗ Error reading CSV: {e}")
|
||||
return
|
||||
else:
|
||||
# Parse comma-separated list
|
||||
fields_to_plot = [f.strip() for f in sys.argv[1].split(',')]
|
||||
print(f"\n[1/4] Processing {len(fields_to_plot)} field(s): {', '.join(fields_to_plot)}")
|
||||
|
||||
# Load CI data
|
||||
print("\n[2/4] Loading CI data...")
|
||||
try:
|
||||
ci_data = pd.read_csv("ci_data_for_python.csv")
|
||||
ci_data['Date'] = pd.to_datetime(ci_data['Date'])
|
||||
ci_data['field'] = ci_data['field'].astype(str).str.strip()
|
||||
print(f" ✓ Loaded {len(ci_data)} observations for {ci_data['field'].nunique()} fields")
|
||||
except Exception as e:
|
||||
print(f" ✗ Error loading CI data: {e}")
|
||||
return
|
||||
|
||||
# Load model
|
||||
print("\n[3/4] Loading model...")
|
||||
try:
|
||||
model, config, scalers = load_model_and_config(Path("."))
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model.eval()
|
||||
print(f" ✓ Model loaded on {device}")
|
||||
except Exception as e:
|
||||
print(f" ✗ Error loading model: {e}")
|
||||
return
|
||||
|
||||
# Process each field
|
||||
print("\n[4/4] Generating RGB validation plots...")
|
||||
success_count = 0
|
||||
for field_id in fields_to_plot:
|
||||
if plot_field_rgb_validation(field_id, ci_data, model, config, scalers, device):
|
||||
success_count += 1
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*80)
|
||||
print(f"SUMMARY: {success_count}/{len(fields_to_plot)} fields processed successfully")
|
||||
print(f"Output directory: validation_plots_rgb/")
|
||||
print("="*80)
|
||||
print("\nInspect the PNG files to verify predictions:")
|
||||
print(" ✓ Green imagery BEFORE peak date (field growing)")
|
||||
print(" ✓ Brown/Bare imagery AT/AFTER peak date (harvested)")
|
||||
print(" ✓ Peak date marked with red title")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -22,6 +22,8 @@ import numpy as np
|
|||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import matplotlib
|
||||
matplotlib.use('Agg') # Use non-interactive backend to avoid display hangs
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
from matplotlib.colors import Normalize
|
||||
|
|
@ -86,55 +88,95 @@ def load_field_boundaries(geojson_path, field_id):
|
|||
return None, None
|
||||
|
||||
|
||||
def find_closest_tiff(target_date, tiff_dir, days_window=60, field_boundary=None):
|
||||
def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60):
|
||||
"""
|
||||
Find available TIFF file closest to target date.
|
||||
Skips obviously empty files (< 12 MB) without reading data.
|
||||
Find all tile files for target_date (or closest date) that overlap with field_boundary.
|
||||
|
||||
TIFF files are named: YYYY-MM-DD.tif
|
||||
Tile files are organized in subdirectories by date: 5x5/YYYY-MM-DD_HH/*.tif
|
||||
|
||||
Args:
|
||||
target_date (pd.Timestamp): Target date to find TIFF near
|
||||
tiff_dir (Path): Directory containing TIFF files
|
||||
target_date (pd.Timestamp): Target date to find tiles near
|
||||
tiff_dir (Path): Directory containing 5x5 date subdirectories
|
||||
field_boundary (shapely.Polygon): Field boundary for overlap detection
|
||||
days_window (int): Max days to search before/after target
|
||||
field_boundary (shapely.Polygon): Unused, kept for compatibility
|
||||
|
||||
Returns:
|
||||
Path: Path to closest TIFF file or None if not found
|
||||
int: Days difference from target (negative=before, positive=after)
|
||||
tuple: (list of tile paths, actual_date, days_diff)
|
||||
list: tile paths that overlap field
|
||||
pd.Timestamp: actual date of tiles found
|
||||
int: days difference from target to actual date found
|
||||
"""
|
||||
target_date = pd.Timestamp(target_date)
|
||||
tiff_dir = Path(tiff_dir)
|
||||
|
||||
available_tiffs = list(tiff_dir.glob('*.tif'))
|
||||
if not available_tiffs:
|
||||
return None, None
|
||||
if not tiff_dir.exists():
|
||||
return [], None, None
|
||||
|
||||
# Parse dates from filenames, skip obviously empty files by size
|
||||
tiff_dates = []
|
||||
min_size_mb = 12.0 # Empty files are ~11.56 MB, valid files are ~12.6-13.0 MB
|
||||
# Find all date subdirectories
|
||||
available_dates = {} # {date: ([tile file paths], actual_dir_name)}
|
||||
min_size_mb = 12.0 # Empty files are ~11.56 MB
|
||||
|
||||
for tiff_path in available_tiffs:
|
||||
for date_dir in tiff_dir.iterdir():
|
||||
if not date_dir.is_dir():
|
||||
continue
|
||||
|
||||
try:
|
||||
# Quick size check to skip obviously empty files
|
||||
file_size_mb = tiff_path.stat().st_size / (1024 * 1024)
|
||||
if file_size_mb < min_size_mb:
|
||||
# Parse date from directory name (YYYY-MM-DD or YYYY-MM-DD_HH)
|
||||
dir_name = date_dir.name
|
||||
# Extract just the date part before underscore if it exists
|
||||
date_str = dir_name.split('_')[0]
|
||||
tile_date = pd.Timestamp(date_str)
|
||||
days_diff = (tile_date - target_date).days
|
||||
|
||||
if abs(days_diff) > days_window:
|
||||
continue
|
||||
|
||||
date_str = tiff_path.stem # Remove .tif extension
|
||||
tiff_date = pd.Timestamp(date_str)
|
||||
days_diff = (tiff_date - target_date).days
|
||||
if abs(days_diff) <= days_window:
|
||||
tiff_dates.append((tiff_path, tiff_date, days_diff))
|
||||
# Find all .tif files in this date directory
|
||||
tile_files = []
|
||||
for tile_file in date_dir.glob('*.tif'):
|
||||
# Skip obviously empty files
|
||||
file_size_mb = tile_file.stat().st_size / (1024 * 1024)
|
||||
if file_size_mb >= min_size_mb:
|
||||
tile_files.append(tile_file)
|
||||
|
||||
if tile_files:
|
||||
available_dates[tile_date] = (tile_files, dir_name)
|
||||
except:
|
||||
pass
|
||||
|
||||
if not tiff_dates:
|
||||
return None, None
|
||||
if not available_dates:
|
||||
return [], None, None
|
||||
|
||||
# Return closest by distance
|
||||
closest = min(tiff_dates, key=lambda x: abs(x[2]))
|
||||
return closest[0], closest[2]
|
||||
# Find closest date
|
||||
closest_date = min(available_dates.keys(), key=lambda d: abs((d - target_date).days))
|
||||
days_diff = (closest_date - target_date).days
|
||||
tiles, _ = available_dates[closest_date]
|
||||
|
||||
# Filter tiles to only those that overlap field boundary
|
||||
if rasterio is None or field_boundary is None:
|
||||
# If rasterio not available, use all tiles (conservative approach)
|
||||
return tiles, closest_date, days_diff
|
||||
|
||||
overlapping_tiles = []
|
||||
|
||||
for tile_path in tiles:
|
||||
try:
|
||||
with rasterio.open(tile_path) as src:
|
||||
# Get tile bounds
|
||||
tile_bounds = src.bounds # (left, bottom, right, top)
|
||||
tile_geom = shgeom.box(*tile_bounds)
|
||||
|
||||
# Check if tile overlaps field
|
||||
if tile_geom.intersects(field_boundary):
|
||||
overlapping_tiles.append(tile_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
if not overlapping_tiles:
|
||||
# No overlapping tiles found, return all tiles for the closest date
|
||||
return tiles, closest_date, days_diff
|
||||
|
||||
return overlapping_tiles, closest_date, days_diff
|
||||
|
||||
|
||||
def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
|
||||
|
|
@ -148,12 +190,6 @@ def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
|
|||
- Band 4: NIR
|
||||
- Band 5: CI
|
||||
|
||||
For older merged_tif_8b files (raw Planet Scope 8-band):
|
||||
- Bands 1=Coastal Blue, 2=Blue, 3=Green, 4=Red, 5=Red Edge, 6=NIR, 7=SWIR1, 8=SWIR2
|
||||
- RGB would be bands 4,3,2 = Red, Green, Blue
|
||||
|
||||
This function auto-detects the format based on band count and descriptions.
|
||||
|
||||
Args:
|
||||
tiff_path (Path): Path to TIFF file
|
||||
field_boundary (shapely.Polygon): Field boundary for clipping
|
||||
|
|
@ -168,58 +204,106 @@ def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
|
|||
|
||||
try:
|
||||
with rasterio.open(tiff_path) as src:
|
||||
# Check CRS compatibility
|
||||
# Check band count
|
||||
if src.count < 3:
|
||||
print(f" ⚠ TIFF has only {src.count} bands (need at least 3 for RGB)")
|
||||
return None
|
||||
|
||||
# Auto-detect format based on band count and descriptions
|
||||
if src.count == 5 and hasattr(src, 'descriptions') and src.descriptions:
|
||||
# merged_final_tif format: Red, Green, Blue, NIR, CI
|
||||
bands_to_read = (1, 2, 3)
|
||||
elif src.count == 9:
|
||||
# merged_tif_8b format: use bands 4, 3, 2 for Red, Green, Blue
|
||||
bands_to_read = (4, 3, 2)
|
||||
else:
|
||||
# Default: try to use first 3 bands or specified bands
|
||||
bands_to_read = rgb_bands
|
||||
print(f" ℹ Unknown TIFF format ({src.count} bands), using bands {bands_to_read}")
|
||||
# For merged_final_tif: bands 1,2,3 are R,G,B
|
||||
bands_to_read = (1, 2, 3)
|
||||
|
||||
# Mask and read bands (crop=True reads only the clipped window, not full resolution)
|
||||
# Mask and read bands
|
||||
geom = shgeom.mapping(field_boundary)
|
||||
try:
|
||||
# Read RGB bands at once, then mask (more efficient than masking 3 times)
|
||||
masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read))
|
||||
|
||||
# masked_data shape is (3, height, width) - one layer per band
|
||||
rgb_data = []
|
||||
for i, band_idx in enumerate(bands_to_read):
|
||||
band_data = masked_data[i] # Extract the i-th band from masked stack
|
||||
|
||||
# Better debug output that handles NaN values
|
||||
valid_data = band_data[~np.isnan(band_data)]
|
||||
if len(valid_data) > 0:
|
||||
print(f" DEBUG: Band {band_idx} valid data range: {valid_data.min():.4f} - {valid_data.max():.4f} ({len(valid_data)} valid pixels)")
|
||||
else:
|
||||
print(f" DEBUG: Band {band_idx} - all NaN!")
|
||||
rgb_data.append(band_data)
|
||||
|
||||
# Stack RGB
|
||||
rgb = np.stack(rgb_data, axis=-1)
|
||||
rgb = np.stack([masked_data[i] for i in range(3)], axis=-1)
|
||||
|
||||
# Data is already normalized 0-1 from the merged_final_tif files
|
||||
# Just ensure it's float32 and clipped
|
||||
# Convert to float32 if not already
|
||||
rgb = rgb.astype(np.float32)
|
||||
|
||||
# Normalize to 0-1 range
|
||||
# Data appears to be 8-bit (0-255 range) stored as float32
|
||||
# Check actual max value to determine normalization
|
||||
max_val = np.nanmax(rgb)
|
||||
if max_val > 0:
|
||||
# If max is around 255 or less, assume 8-bit
|
||||
if max_val <= 255:
|
||||
rgb = rgb / 255.0
|
||||
# If max is around 65535, assume 16-bit
|
||||
elif max_val <= 65535:
|
||||
rgb = rgb / 65535.0
|
||||
# Otherwise divide by max to normalize
|
||||
else:
|
||||
rgb = rgb / max_val
|
||||
|
||||
rgb = np.clip(rgb, 0, 1)
|
||||
|
||||
print(f" DEBUG: Final RGB range: {rgb.min():.4f} - {rgb.max():.4f}")
|
||||
# Check if result is all NaN
|
||||
if np.all(np.isnan(rgb)):
|
||||
return None
|
||||
|
||||
# Replace any remaining NaN with 0 (cloud/invalid pixels)
|
||||
rgb = np.nan_to_num(rgb, nan=0.0)
|
||||
|
||||
return rgb
|
||||
except ValueError as e:
|
||||
print(f" ⚠ Error clipping to field boundary: {e}")
|
||||
except ValueError:
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" ✗ Error loading TIFF {tiff_path.name}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def load_and_composite_tiles_rgb(tile_paths, field_boundary):
|
||||
"""
|
||||
Load RGB from multiple overlapping tiles and composite them into a single image.
|
||||
|
||||
Args:
|
||||
tile_paths (list[Path]): List of tile file paths
|
||||
field_boundary (shapely.Polygon): Field boundary for clipping
|
||||
|
||||
Returns:
|
||||
np.ndarray: Composited RGB data (height, width, 3) with values 0-1
|
||||
or None if error occurs
|
||||
"""
|
||||
if rasterio is None or field_boundary is None or not tile_paths:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Load and composite all tiles
|
||||
rgb_arrays = []
|
||||
|
||||
for tile_path in tile_paths:
|
||||
rgb = load_and_clip_tiff_rgb(tile_path, field_boundary)
|
||||
if rgb is not None:
|
||||
rgb_arrays.append(rgb)
|
||||
|
||||
if not rgb_arrays:
|
||||
return None
|
||||
|
||||
# If single tile, return it
|
||||
if len(rgb_arrays) == 1:
|
||||
composited = rgb_arrays[0]
|
||||
else:
|
||||
# If multiple tiles, use max composite
|
||||
stacked = np.stack(rgb_arrays, axis=0)
|
||||
composited = np.max(stacked, axis=0)
|
||||
|
||||
composited = composited.astype(np.float32)
|
||||
|
||||
# Stretch contrast: normalize to 0-1 range based on actual min/max in the data
|
||||
# This makes dim images visible
|
||||
valid_data = composited[composited > 0]
|
||||
if len(valid_data) > 0:
|
||||
data_min = np.percentile(valid_data, 2) # 2nd percentile to handle outliers
|
||||
data_max = np.percentile(valid_data, 98) # 98th percentile
|
||||
|
||||
if data_max > data_min:
|
||||
composited = (composited - data_min) / (data_max - data_min)
|
||||
composited = np.clip(composited, 0, 1)
|
||||
|
||||
return composited.astype(np.float32)
|
||||
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -276,27 +360,24 @@ def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field
|
|||
actual_dates = [] # Store actual dates of TIFFs found
|
||||
|
||||
for target in target_dates:
|
||||
tiff_path, days_diff = find_closest_tiff(target, tiff_dir, days_window=60, field_boundary=field_boundary)
|
||||
tile_paths, actual_date, days_diff = find_overlapping_tiles(target, tiff_dir, field_boundary, days_window=60)
|
||||
|
||||
if tiff_path is None:
|
||||
if not tile_paths or actual_date is None:
|
||||
rgb_images.append(None)
|
||||
days_offsets.append(None)
|
||||
actual_dates.append(None)
|
||||
print(f" ⚠ No TIFF found within 60 days of {target.strftime('%Y-%m-%d')} with sufficient data")
|
||||
print(f" ⚠ No tiles found within 60 days of {target.strftime('%Y-%m-%d')} with sufficient data")
|
||||
continue
|
||||
|
||||
# Extract date from filename: YYYY-MM-DD.tif
|
||||
tiff_date = pd.to_datetime(tiff_path.stem)
|
||||
|
||||
rgb = load_and_clip_tiff_rgb(tiff_path, field_boundary)
|
||||
rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
|
||||
rgb_images.append(rgb)
|
||||
days_offsets.append(days_diff)
|
||||
actual_dates.append(tiff_date)
|
||||
actual_dates.append(actual_date)
|
||||
|
||||
if rgb is not None:
|
||||
print(f" ✓ Loaded {tiff_path.name} ({days_diff:+d}d from target)")
|
||||
print(f" ✓ Loaded {len(tile_paths)} tile(s) for {actual_date.strftime('%Y-%m-%d')} ({days_diff:+d}d from target)")
|
||||
else:
|
||||
print(f" ⚠ Loaded {tiff_path.name} but RGB data is None")
|
||||
print(f" ⚠ Loaded {len(tile_paths)} tile(s) but RGB data is None")
|
||||
|
||||
# Create 5x3 grid plot (15 images)
|
||||
fig, axes = plt.subplots(3, 5, figsize=(25, 15))
|
||||
|
|
@ -360,11 +441,16 @@ def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field
|
|||
filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest_rgb.png'
|
||||
|
||||
output_path = Path(output_dir) / filename
|
||||
plt.savefig(output_path, dpi=100, bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
print(f" ✓ Saved: {filename}")
|
||||
return output_path
|
||||
try:
|
||||
plt.savefig(output_path, dpi=100, format='png')
|
||||
plt.close()
|
||||
|
||||
print(f" ✓ Saved: {filename}")
|
||||
return output_path
|
||||
except Exception as e:
|
||||
plt.close()
|
||||
print(f" ✗ Error saving PNG: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates,
|
||||
|
|
@ -11,11 +11,28 @@ library(terra)
|
|||
library(sf)
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION
|
||||
# CONFIGURATION & COMMAND-LINE ARGUMENTS
|
||||
# ============================================================================
|
||||
|
||||
# Parse command-line arguments for date filtering
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17
|
||||
start_date <- NULL
|
||||
end_date <- NULL
|
||||
|
||||
if (length(args) >= 1) {
|
||||
start_date <- as.Date(args[1])
|
||||
cat("Filtering: start date =", as.character(start_date), "\n")
|
||||
}
|
||||
|
||||
if (length(args) >= 2) {
|
||||
end_date <- as.Date(args[2])
|
||||
cat("Filtering: end date =", as.character(end_date), "\n")
|
||||
}
|
||||
|
||||
PROJECT <- "angata"
|
||||
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
|
||||
TIFF_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
|
||||
|
||||
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
|
||||
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
|
||||
|
|
@ -25,10 +42,10 @@ GRID_NCOLS <- 5
|
|||
|
||||
# Construct grid-specific subfolder path
|
||||
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
|
||||
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
|
||||
OUTPUT_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
|
||||
|
||||
# Load field boundaries for overlap checking
|
||||
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
|
||||
GEOJSON_PATH <- file.path("..", "laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
|
||||
|
||||
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
|
||||
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
||||
|
|
@ -40,31 +57,50 @@ cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
|||
cat("\n[PART 1] Creating Master Grid\n")
|
||||
|
||||
# Load field boundaries for overlap checking
|
||||
cat("\n[1] Loading field boundaries from GeoJSON...\n")
|
||||
cat("\n[1] Checking for existing master grid...\n")
|
||||
|
||||
if (!file.exists(GEOJSON_PATH)) {
|
||||
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
|
||||
"Please ensure ", PROJECT, " has a pivot.geojson file.")
|
||||
}
|
||||
# Check if master grid already exists
|
||||
MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
||||
|
||||
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
|
||||
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
|
||||
|
||||
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s)\n", sep = "")
|
||||
|
||||
# Try to find a name column (could be 'name', 'field', 'field_name', etc.)
|
||||
field_names <- NA
|
||||
if ("name" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$name
|
||||
} else if ("field" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$field
|
||||
} else if ("field_name" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$field_name
|
||||
if (file.exists(MASTER_GRID_PATH)) {
|
||||
cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "")
|
||||
master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE)
|
||||
field_boundaries_sf <- NULL # No need to load pivot.geojson
|
||||
field_boundaries_vect <- NULL
|
||||
|
||||
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
|
||||
|
||||
} else {
|
||||
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
|
||||
# No existing grid - need to create one from pivot.geojson
|
||||
cat(" No existing grid found. Creating new one from pivot.geojson...\n")
|
||||
|
||||
if (!file.exists(GEOJSON_PATH)) {
|
||||
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
|
||||
"Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ",
|
||||
"from the same directory as a previous successful run (grid already exists).")
|
||||
}
|
||||
|
||||
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
|
||||
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
|
||||
|
||||
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "")
|
||||
}
|
||||
|
||||
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
|
||||
# Try to find a name column (only if field_boundaries_sf exists)
|
||||
if (!is.null(field_boundaries_sf)) {
|
||||
field_names <- NA
|
||||
if ("name" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$name
|
||||
} else if ("field" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$field
|
||||
} else if ("field_name" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$field_name
|
||||
} else {
|
||||
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
|
||||
}
|
||||
|
||||
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
|
||||
}
|
||||
|
||||
# Helper function: Check if a tile overlaps with any field (simple bbox overlap)
|
||||
tile_overlaps_fields <- function(tile_extent, field_geoms) {
|
||||
|
|
@ -105,6 +141,27 @@ cat("\n[2] Checking TIFF extents...\n")
|
|||
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
|
||||
tiff_files <- sort(tiff_files)
|
||||
|
||||
# Filter by date range if specified
|
||||
if (!is.null(start_date) || !is.null(end_date)) {
|
||||
cat("\nApplying date filter...\n")
|
||||
|
||||
file_dates <- as.Date(sub("\\.tif$", "", tiff_files))
|
||||
|
||||
if (!is.null(start_date) && !is.null(end_date)) {
|
||||
keep_idx <- file_dates >= start_date & file_dates <= end_date
|
||||
cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "")
|
||||
} else if (!is.null(start_date)) {
|
||||
keep_idx <- file_dates >= start_date
|
||||
cat(" From: ", as.character(start_date), "\n", sep = "")
|
||||
} else {
|
||||
keep_idx <- file_dates <= end_date
|
||||
cat(" Until: ", as.character(end_date), "\n", sep = "")
|
||||
}
|
||||
|
||||
tiff_files <- tiff_files[keep_idx]
|
||||
cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "")
|
||||
}
|
||||
|
||||
if (length(tiff_files) == 0) {
|
||||
stop("No TIFF files found in ", TIFF_FOLDER)
|
||||
}
|
||||
|
|
@ -242,26 +299,33 @@ if (file.exists(master_grid_file)) {
|
|||
|
||||
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
|
||||
|
||||
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
|
||||
|
||||
# Check which tiles overlap with any field
|
||||
overlapping_tile_indices <- c()
|
||||
for (tile_idx in 1:nrow(master_grid_sf)) {
|
||||
tile_geom <- master_grid_sf[tile_idx, ]
|
||||
# If grid was loaded from file, it's already filtered. Skip filtering.
|
||||
if (!file.exists(MASTER_GRID_PATH)) {
|
||||
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
|
||||
|
||||
# Check overlap with any field
|
||||
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
|
||||
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
|
||||
# Check which tiles overlap with any field
|
||||
overlapping_tile_indices <- c()
|
||||
for (tile_idx in 1:nrow(master_grid_sf)) {
|
||||
tile_geom <- master_grid_sf[tile_idx, ]
|
||||
|
||||
# Check overlap with any field
|
||||
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
|
||||
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
|
||||
}
|
||||
}
|
||||
|
||||
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
|
||||
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
|
||||
|
||||
# Create filtered grid with only overlapping tiles
|
||||
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
|
||||
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
|
||||
} else {
|
||||
cat("\n[7] Using pre-filtered grid (already loaded from file)...\n")
|
||||
# Grid was already loaded - it's already filtered
|
||||
filtered_grid_sf <- master_grid_sf
|
||||
}
|
||||
|
||||
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
|
||||
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
|
||||
|
||||
# Create filtered grid with only overlapping tiles
|
||||
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
|
||||
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
|
||||
|
||||
# Convert to SpatVector for makeTiles
|
||||
filtered_grid_vect <- terra::vect(filtered_grid_sf)
|
||||
|
||||
|
|
@ -314,7 +378,7 @@ for (file_idx in seq_along(tiff_files)) {
|
|||
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
|
||||
cat(" Creating ", length(overlapping_tile_indices), " tiles...\n", sep = "")
|
||||
cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "")
|
||||
|
||||
# Use makeTiles with FILTERED grid (only overlapping tiles)
|
||||
tiles_list <- terra::makeTiles(
|
||||
|
|
|
|||
|
|
@ -54,14 +54,12 @@ main <- function() {
|
|||
if (is.na(end_date)) {
|
||||
message("Invalid end_date provided. Using current date.")
|
||||
end_date <- Sys.Date()
|
||||
end_date <- "2026-01-01" # Default date for testing
|
||||
}
|
||||
} else if (exists("end_date_str", envir = .GlobalEnv)) {
|
||||
end_date <- as.Date(get("end_date_str", envir = .GlobalEnv))
|
||||
} else {
|
||||
# Default to current date if no argument is provided
|
||||
end_date <- Sys.Date()
|
||||
end_date <- "2026-01-01" # Default date for testing
|
||||
message("No end_date provided. Using current date: ", format(end_date))
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue