updated code to fit in workflow better

This commit is contained in:
Timon 2026-01-12 16:33:23 +01:00
parent d1303dd218
commit cb63cf00b8
11 changed files with 2602 additions and 69 deletions

View file

@ -31,7 +31,7 @@ Examples:
python download_8band_pu_optimized.py chemba # Uses today's date
python download_8band_pu_optimized.py xinavane --clear-singles --cleanup
python download_8band_pu_optimized.py angata --clear-all --resolution 5
Cost Model:
- 4-band uint16 with cloud masking: ~50% lower cost than 9-band FLOAT32
- Reduced bbox sizes: ~10-20% lower cost due to smaller average tile size
@ -39,6 +39,18 @@ Cost Model:
- Requests: Slightly higher (~50-60 tiles) but within 700k budget
Expected result: ~75% PU savings with dynamic geometry-fitted grid
Example running it in powershell:
$startDate = [DateTime]::ParseExact("2025-11-01", "yyyy-MM-dd", $null)
$endDate = [DateTime]::ParseExact("2025-12-24", "yyyy-MM-dd", $null)
$current = $startDate
while ($current -le $endDate) {
$dateStr = $current.ToString("yyyy-MM-dd")
Write-Host "Downloading $dateStr..."
python download_8band_pu_optimized.py angata --date $dateStr
$current = $current.AddDays(1)
}
"""
import os

View file

@ -0,0 +1,111 @@
"""
Script: 01_harvest_baseline_prediction.py
Purpose: BASELINE PREDICTION - Run ONCE to establish harvest date baseline for all fields and seasons
This script processes COMPLETE historical CI data (all available dates) and uses Model 307
to predict ALL harvest dates across the entire dataset. This becomes your reference baseline
for monitoring and comparisons going forward.
RUN FREQUENCY: Once during initial setup
INPUT: ci_data_for_python.csv (complete historical CI data from 02b_convert_rds_to_csv.R)
Location: laravel_app/storage/app/{project}/Data/extracted_ci/ci_data_for_python/ci_data_for_python.csv
OUTPUT: harvest_production_export.xlsx (baseline harvest predictions for all fields/seasons)
Workflow:
1. Load ci_data_for_python.csv (daily interpolated, all historical dates)
2. Group data by field and season (Model 307 detects season boundaries internally)
3. Run two-step harvest detection (Phase 1: fast detection, Phase 2: ±40 day refinement)
4. Export harvest_production_export.xlsx with columns:
- field, sub_field, season, year, season_start_date, season_end_date, phase1_harvest_date
Two-Step Detection Algorithm:
Phase 1 (Growing Window): Expands daily, checks when detected_prob > 0.5 for 3 consecutive days
Phase 2 (Refinement): Extracts ±40 day window, finds peak harvest signal with argmax
This is your GROUND TRUTH - compare all future predictions against this baseline.
Usage:
python 01_harvest_baseline_prediction.py [project_name]
Examples:
python 01_harvest_baseline_prediction.py angata
python 01_harvest_baseline_prediction.py esa
python 01_harvest_baseline_prediction.py chemba
If no project specified, defaults to 'angata'
"""
import pandas as pd
import numpy as np
import torch
import sys
from pathlib import Path
from harvest_date_pred_utils import (
load_model_and_config,
extract_features,
run_two_step_refinement,
build_production_harvest_table
)
def main():
# Get project name from command line or use default
project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
# Construct paths
base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
harvest_data_dir = base_storage / "HarvestData"
harvest_data_dir.mkdir(parents=True, exist_ok=True) # Create if doesn't exist
OUTPUT_XLSX = harvest_data_dir / "harvest_production_export.xlsx"
MODEL_DIR = Path(".") # Model files in python_app/
# Check if input exists
if not CI_DATA_FILE.exists():
print(f"ERROR: {CI_DATA_FILE} not found")
print(f" Expected at: {CI_DATA_FILE.resolve()}")
print(f"\n Run 02b_convert_rds_to_csv.R first to generate this file:")
print(f" Rscript r_app/02b_convert_ci_rds_to_csv.R {project_name}")
return
print("="*80)
print(f"HARVEST DATE PREDICTION - LSTM MODEL 307 ({project_name})")
print("="*80)
# [1/4] Load model
print("\n[1/4] Loading Model 307...")
model, config, scalers = load_model_and_config(MODEL_DIR)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f" Device: {device}")
# [2/4] Load and prepare CI data
print("\n[2/4] Loading CI data...")
print(f" From: {CI_DATA_FILE}")
ci_data = pd.read_csv(CI_DATA_FILE)
ci_data['Date'] = pd.to_datetime(ci_data['Date'])
print(f" Loaded {len(ci_data)} daily rows across {ci_data['field'].nunique()} fields")
print(f" Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}")
# [3/4] Run model predictions with two-step detection
print("\n[3/4] Running two-step harvest detection...")
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device)
# Build and export
print("\nBuilding production harvest table...")
prod_table = build_production_harvest_table(refined_results)
prod_table.to_excel(OUTPUT_XLSX, index=False)
print(f"\n✓ Exported {len(prod_table)} predictions to {OUTPUT_XLSX}")
print(f"\nOutput location: {OUTPUT_XLSX.resolve()}")
print(f"\nStorage structure:")
print(f" Input: laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
print(f" Output: laravel_app/storage/app/{project_name}/Data/HarvestData/")
print(f"\nColumn structure:")
print(f" field, sub_field, season, year, season_start_date, season_end_date, phase1_harvest_date")
print(f"\nNext steps:")
print(f" 1. Review baseline predictions in harvest_production_export.xlsx")
print(f" 2. Run weekly monitoring: python 02_harvest_imminent_weekly.py {project_name}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,348 @@
"""
Script: 02_harvest_imminent_weekly.py
Purpose: WEEKLY MONITORING - Run WEEKLY/DAILY to get real-time harvest status for all fields
This script runs on RECENT CI data (typically last 300 days) to predict whether each field
is approaching harvest. Use this for operational decision-making and real-time alerts.
RUN FREQUENCY: Weekly (or daily if required)
INPUT:
- ci_data_for_python.csv (recent CI data from 02b_convert_rds_to_csv.R)
Location: laravel_app/storage/app/{project}/Data/extracted_ci/ci_data_for_python/ci_data_for_python.csv
- harvest_production_export.xlsx (baseline from script 01 - optional, for reference)
OUTPUT:
- harvest_imminent_weekly.csv (weekly probabilities: field, imminent_prob, detected_prob, week, year)
Workflow:
1. Load harvest_production_export.xlsx (baseline dates - optional, for context)
2. Load ci_data_for_python.csv (recent CI data)
3. For each field, extract last 300 days of history
4. Run Model 307 inference on full sequence (last timestep probabilities)
5. Export harvest_imminent_weekly.csv with probabilities
Output Columns:
- field: Field ID
- sub_field: Sub-field identifier
- imminent_prob: Probability field will be harvestable in next 28 days (0.0-1.0)
- detected_prob: Probability field is currently being harvested (0.0-1.0)
- week: ISO week number
- year: Year
- as_of_date: Latest date in dataset
- num_days: Number of days of history used
Use Cases:
- Alert when imminent_prob > 0.7 (prepare harvest operations)
- Alert when detected_prob > 0.6 (field is being harvested)
- Track trends over weeks to validate baseline predictions
- Feed into 09b script for weekly dashboard reports
Usage:
python 02_harvest_imminent_weekly.py [project_name]
Examples:
python 02_harvest_imminent_weekly.py angata
python 02_harvest_imminent_weekly.py esa
python 02_harvest_imminent_weekly.py chemba
If no project specified, defaults to 'angata'
"""
import pandas as pd
import numpy as np
import torch
import subprocess
import sys
from pathlib import Path
from datetime import datetime, timedelta
from harvest_date_pred_utils import (
load_model_and_config,
extract_features,
)
def load_harvest_dates(harvest_file):
"""Load latest harvest end dates from Excel file (from harvest_production_export.xlsx)."""
print("[1/5] Loading harvest dates...")
if not Path(harvest_file).exists():
print(f" ERROR: {harvest_file} not found")
print(" Using 180-day lookback as default")
return None
try:
harvest_df = pd.read_excel(harvest_file)
print(f" Loaded {len(harvest_df)} field-season records")
# Use season_end_date column (output from harvest prediction script)
harvest_df['season_end_date'] = pd.to_datetime(harvest_df['season_end_date'])
# Group by field and get the latest season_end_date
harvest_dates = {}
for field_id, group in harvest_df.groupby('field'):
latest_end = group['season_end_date'].max()
harvest_dates[str(field_id).strip()] = latest_end
print(f" Successfully mapped {len(harvest_dates)} fields")
print(f" Harvest end dates range: {min(harvest_dates.values()).date()} to {max(harvest_dates.values()).date()}")
return harvest_dates
except Exception as e:
print(f" ERROR loading harvest file: {e}")
print(f" Using 180-day lookback instead")
return None
def run_rds_to_csv_conversion():
"""Run R script to convert RDS to CSV."""
print("\n[2/5] Converting RDS to CSV (daily interpolation)...")
r_script = Path("02b_convert_rds_to_csv.R")
if not r_script.exists():
print(f" ERROR: {r_script} not found")
return False
# Use full path to Rscript on Windows
rscript_exe = r"C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe"
try:
result = subprocess.run(
[rscript_exe, str(r_script)],
capture_output=True,
text=True,
timeout=300
)
if result.returncode != 0:
print(f" ERROR running R script:\n{result.stderr}")
return False
# Show last few lines of output
lines = result.stdout.strip().split('\n')
for line in lines[-5:]:
if line.strip():
print(f" {line}")
return True
except Exception as e:
print(f" ERROR: {e}")
return False
def load_ci_data(csv_file):
"""Load CI data."""
print("\n[3/5] Loading CI data...")
if not Path(csv_file).exists():
print(f" ERROR: {csv_file} not found")
return None
ci_data = pd.read_csv(csv_file)
ci_data['Date'] = pd.to_datetime(ci_data['Date'])
print(f" Loaded {len(ci_data)} daily rows for {ci_data['field'].nunique()} fields")
print(f" Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}")
return ci_data
def extract_seasonal_data(field_id, harvest_date, ci_data):
"""
Extract CI data from harvest date to latest for a specific field.
Returns dataframe sorted by date, or None if insufficient data.
"""
# field_id is int, ci_data['field'] is also int
field_data = ci_data[ci_data['field'] == field_id].copy()
if len(field_data) == 0:
return None
# Filter from harvest date onwards
field_data = field_data[field_data['Date'] >= harvest_date].sort_values('Date')
# Need at least 30 days of data for meaningful inference
if len(field_data) < 30:
return None
return field_data
def run_inference_on_season(field_data, model, config, scalers, device, ci_column='FitData'):
"""
Run Model 307 inference on recent field CI history.
Predicts probability that field will be ready to harvest in next 28 days.
Uses last timestep from the provided data sequence.
Returns (imminent_prob, detected_prob) for prediction.
"""
try:
# Use last 300 days of data for inference (enough history for meaningful patterns,
# avoids training data seasonality mismatch)
if len(field_data) > 300:
field_data = field_data.iloc[-300:]
# Extract features
features_array = extract_features(field_data, config['features'], ci_column)
if features_array.shape[0] < 10:
return None, None
# Scale features using per-feature scalers (CRITICAL: same as Phase 1 in harvest_date_pred_utils.py)
# Scalers is a list of StandardScaler objects, one per feature
if scalers and isinstance(scalers, list):
for fi, scaler in enumerate(scalers):
try:
features_array[:, fi] = scaler.transform(features_array[:, fi].reshape(-1, 1)).flatten()
except Exception:
pass
# Run inference
with torch.no_grad():
x_tensor = torch.tensor(features_array, dtype=torch.float32).unsqueeze(0).to(device)
out_imm, out_det = model(x_tensor)
# Get last timestep probabilities
imminent_prob = out_imm.squeeze(0)[-1].cpu().item()
detected_prob = out_det.squeeze(0)[-1].cpu().item()
return round(imminent_prob, 4), round(detected_prob, 4)
except Exception as e:
return None, None
def main():
# Get project name from command line or use default
project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
# Construct paths
base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
harvest_data_dir = base_storage / "HarvestData"
BASELINE_FILE = harvest_data_dir / "harvest_production_export.xlsx"
OUTPUT_CSV = harvest_data_dir / "harvest_imminent_weekly.csv"
harvest_data_dir.mkdir(parents=True, exist_ok=True) # Create if doesn't exist
print("="*80)
print(f"HARVEST IMMINENT PROBABILITY - WEEKLY MONITORING ({project_name})")
print("="*80)
# [1] Load harvest dates (optional - for projects with predictions)
harvest_dates = None
if BASELINE_FILE.exists():
harvest_dates = load_harvest_dates(BASELINE_FILE)
else:
print("[1/5] Loading harvest dates...")
print(f" INFO: {BASELINE_FILE} not found (optional for weekly monitoring)")
# [2] Load CI data
print(f"\n[2/5] Loading CI data...")
print(f" From: {CI_DATA_FILE}")
if not CI_DATA_FILE.exists():
print(f" ERROR: {CI_DATA_FILE} not found")
print(f" Expected at: {CI_DATA_FILE.resolve()}")
print(f"\n Run 02b_convert_rds_to_csv.R first to generate this file:")
print(f" Rscript r_app/02b_convert_ci_rds_to_csv.R {project_name}")
return
ci_data = load_ci_data(CI_DATA_FILE)
if ci_data is None:
print("ERROR: Could not load CI data")
return
# [3] Load model (from python_app directory)
print("\n[3/5] Loading Model 307...")
model_dir = Path(".") # Current directory is python_app/, contains model.pt, config.json, scalers.pkl
model, config, scalers = load_model_and_config(model_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f" Device: {device}")
# [4] Run inference per field
print("\n[4/5] Running seasonal inference...")
results_list = []
ci_column = config['data']['ci_column']
# Get field metadata
field_meta = ci_data.groupby('field').agg({
'sub_field': 'first',
'Date': 'max'
}).reset_index()
field_meta.columns = ['field', 'sub_field', 'latest_date']
count = 0
for field_id in ci_data['field'].unique():
# Get metadata
meta = field_meta[field_meta['field'] == field_id]
if len(meta) == 0:
continue
sub_field = meta['sub_field'].iloc[0]
latest_date = meta['latest_date'].iloc[0]
# Use recent CI history (last 300 days from latest available data)
field_data = ci_data[ci_data['field'] == field_id].copy()
field_data = field_data.sort_values('Date')
# Keep last 300 days of history for inference
if len(field_data) > 300:
field_data = field_data.iloc[-300:]
if len(field_data) < 30:
continue
# Run inference on recent history to predict next 28 days
imminent_prob, detected_prob = run_inference_on_season(
field_data, model, config, scalers, device, ci_column
)
if imminent_prob is None:
continue
week = int(latest_date.strftime('%V'))
year = int(latest_date.strftime('%Y'))
results_list.append({
'field': field_id,
'sub_field': sub_field,
'imminent_prob': imminent_prob,
'detected_prob': detected_prob,
'week': week,
'year': year,
'as_of_date': latest_date,
'num_days': len(field_data),
})
count += 1
print(f" Completed inference for {count} fields")
# Build output DataFrame
df = pd.DataFrame(results_list)
df.to_csv(OUTPUT_CSV, index=False)
print(f"\n[5/5] Exporting results...")
print(f"✓ Exported {len(df)} fields to {OUTPUT_CSV}")
print(f" Output location: {OUTPUT_CSV.resolve()}")
if len(df) > 0:
print(f"\nSample rows:")
print(df[['field', 'sub_field', 'imminent_prob', 'detected_prob', 'num_days', 'week', 'year']].head(15).to_string(index=False))
# Show alert summary
high_imminent = len(df[df['imminent_prob'] > 0.7])
high_detected = len(df[df['detected_prob'] > 0.6])
print(f"\n⚠ ALERTS:")
print(f" Fields with imminent_prob > 0.70: {high_imminent}")
print(f" Fields with detected_prob > 0.60: {high_detected}")
else:
print(f" WARNING: No results exported - check CI data availability")
print(f"\nStorage structure:")
print(f" Input CI: laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
print(f" Input baseline: laravel_app/storage/app/{project_name}/Data/HarvestData/harvest_production_export.xlsx")
print(f" Output: laravel_app/storage/app/{project_name}/Data/HarvestData/")
print(f"\nReady to load into 09b field analysis report")
if __name__ == "__main__":
main()

View file

@ -18,6 +18,7 @@ import sys
import json
import datetime
import argparse
import subprocess
from pathlib import Path
from osgeo import gdal
import time
@ -441,6 +442,7 @@ def get_evalscript():
def main():
print("="*80)
print("PLANET SATELLITE DATA DOWNLOADER - MISSING DATES ONLY")
print("Wrapper for 00_download_8band_pu_optimized.py")
print("="*80)
config_dict = get_config()
@ -495,47 +497,45 @@ def main():
print(f" - {date}")
if config_dict['dry_run']:
print("\n[DRY-RUN] Would download and merge above dates")
print("\n[DRY-RUN] Would download above dates using 00_download_8band_pu_optimized.py")
return 0
# Setup BBox list
print(f"\nLoading field geometries...")
bbox_list = setup_bbox_list(paths['geojson'], resolution=config_dict['resolution'])
if bbox_list is None:
return 1
print(f" Created {len(bbox_list)} BBox tiles")
# Download and merge each missing date
print(f"\nDownloading missing dates...")
# Download each missing date using the optimized downloader
print(f"\n{'='*80}")
print(f"Downloading missing dates using optimized script...")
print(f"{'='*80}")
success_count = 0
for i, slot in enumerate(missing_dates, 1):
print(f"\n[{i}/{len(missing_dates)}] Processing {slot}...")
for i, date_str in enumerate(missing_dates, 1):
print(f"\n[{i}/{len(missing_dates)}] Downloading {date_str}...")
# Check availability
if not is_image_available(slot, bbox_list, collection_id):
print(f" Skipping {slot} - no imagery available")
continue
# Call 00_download_8band_pu_optimized.py for this date
cmd = [
sys.executable,
"00_download_8band_pu_optimized.py",
config_dict['project'],
"--date", date_str,
"--resolution", str(config_dict['resolution']),
"--cleanup"
]
# Download for all bboxes
print(f" Downloading {len(bbox_list)} tiles...")
for bbox in bbox_list:
size = bbox_to_dimensions(bbox, resolution=config_dict['resolution'])
download_function(slot, bbox, size, paths['single_images'])
# Merge
print(f" Merging tiles...")
if merge_files(slot, paths['single_images'], paths['merged_tifs'], paths['virtual_raster']):
try:
result = subprocess.run(cmd, check=True, capture_output=False)
success_count += 1
print(f" ✓ Successfully downloaded {date_str}")
except subprocess.CalledProcessError as e:
print(f" ✗ Failed to download {date_str}: {e}")
# Continue with next date instead of stopping
continue
# Summary
print(f"\n{'='*80}")
print(f"SUMMARY:")
print(f" Successfully processed: {success_count}/{len(missing_dates)} dates")
print(f" Output folder: {paths['merged_tifs']}")
print(f"{'='*80}")
return 0
return 0 if success_count == len(missing_dates) else 1
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,482 @@
"""
Self-contained utility module for two-step harvest date prediction and Excel export.
Includes model architecture, feature engineering, and core prediction logic.
"""
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import pickle
import yaml
from pathlib import Path
from typing import Tuple, Dict, Any, List
from sklearn.preprocessing import StandardScaler
# ============================================================================
# TORCH MODELS (from src/models.py, inlined for self-containment)
# ============================================================================
class HarvestDetectionLSTM(nn.Module):
"""Unidirectional LSTM for harvest detection with dual outputs."""
def __init__(self, input_size: int, hidden_size: int = 128,
num_layers: int = 1, dropout: float = 0.5):
super(HarvestDetectionLSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout if num_layers > 1 else 0,
bidirectional=False,
batch_first=True
)
self.imminent_head = nn.Sequential(
nn.Linear(hidden_size, 16),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(16, 1),
nn.Sigmoid()
)
self.detected_head = nn.Sequential(
nn.Linear(hidden_size, 16),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(16, 1),
nn.Sigmoid()
)
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
lstm_out, _ = self.lstm(x)
batch_size, seq_len, hidden_size = lstm_out.shape
lstm_flat = lstm_out.reshape(-1, hidden_size)
imminent_flat = self.imminent_head(lstm_flat).reshape(batch_size, seq_len)
detected_flat = self.detected_head(lstm_flat).reshape(batch_size, seq_len)
return imminent_flat, detected_flat
class HarvestDetectionGRU(nn.Module):
"""Unidirectional GRU for harvest detection with dual outputs."""
def __init__(self, input_size: int, hidden_size: int = 128,
num_layers: int = 1, dropout: float = 0.5):
super(HarvestDetectionGRU, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout if num_layers > 1 else 0,
bidirectional=False,
batch_first=True
)
self.imminent_head = nn.Sequential(
nn.Linear(hidden_size, 16),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(16, 1),
nn.Sigmoid()
)
self.detected_head = nn.Sequential(
nn.Linear(hidden_size, 16),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(16, 1),
nn.Sigmoid()
)
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
gru_out, _ = self.gru(x)
batch_size, seq_len, hidden_size = gru_out.shape
gru_flat = gru_out.reshape(-1, hidden_size)
imminent_flat = self.imminent_head(gru_flat).reshape(batch_size, seq_len)
detected_flat = self.detected_head(gru_flat).reshape(batch_size, seq_len)
return imminent_flat, detected_flat
def create_model(model_type: str, input_size: int, hidden_size: int = 128,
num_layers: int = 1, dropout: float = 0.5, device = None) -> nn.Module:
"""Create a model from registry."""
registry = {'LSTM': HarvestDetectionLSTM, 'GRU': HarvestDetectionGRU}
if model_type not in registry:
raise ValueError(f"Unknown model type: {model_type}")
model = registry[model_type](
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout
)
if device:
model = model.to(device)
# Print model info
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model: {model_type}")
print(f" Input size: {input_size}")
print(f" Hidden size: {hidden_size}")
print(f" Num layers: {num_layers}")
print(f" Dropout: {dropout}")
print(f" Total parameters: {total_params:,}")
print(f" Trainable parameters: {trainable_params:,}")
print(f" Device: {device}")
return model
# ============================================================================
# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
# ============================================================================
def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame:
"""Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
features = pd.DataFrame(index=ci_series.index)
# State (moving averages)
features['CI_raw'] = ci_series
features['7d_MA'] = ci_series.rolling(window=7, min_periods=1).mean()
features['14d_MA'] = ci_series.rolling(window=14, min_periods=1).mean()
features['21d_MA'] = ci_series.rolling(window=21, min_periods=1).mean()
# Velocity (gradient of MA)
for window in [7, 14, 21]:
ma = ci_series.rolling(window=window, min_periods=1).mean()
features[f'{window}d_velocity'] = ma.diff() / 1.0 # Simplified gradient
# Acceleration (gradient of velocity)
for window in [7, 14, 21]:
ma = ci_series.rolling(window=window, min_periods=1).mean()
vel = ma.diff()
features[f'{window}d_acceleration'] = vel.diff()
# Min, Max, Range
for window in [7, 14, 21]:
features[f'{window}d_min'] = ci_series.rolling(window=window, min_periods=1).min()
features[f'{window}d_max'] = ci_series.rolling(window=window, min_periods=1).max()
features[f'{window}d_range'] = features[f'{window}d_max'] - features[f'{window}d_min']
# Std and CV
for window in [7, 14, 21]:
features[f'{window}d_std'] = ci_series.rolling(window=window, min_periods=1).std()
ma = ci_series.rolling(window=window, min_periods=1).mean()
features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
# DOY normalized
if doy_series is not None:
features['DOY_normalized'] = doy_series / 450.0
return features.fillna(0)
def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: str = 'FitData') -> np.ndarray:
"""Extract and return specified features as numpy array."""
# Compute all CI features
ci_series = data_df[ci_column].astype(float)
doy_series = pd.Series(range(len(data_df)), index=data_df.index) % 365 if 'DOY_normalized' in feature_names else None
all_features = compute_ci_features(ci_series, doy_series)
# Select requested features
requested = [f for f in feature_names if f in all_features.columns]
if not requested:
raise ValueError(f"No valid features found. Requested: {feature_names}")
return all_features[requested].values
# ============================================================================
# MAIN UTILITY FUNCTIONS
# ============================================================================
def load_model_and_config(model_dir: Path):
"""Load model, config, and scalers from a given directory."""
cwd = Path.cwd()
# Try different naming conventions
candidates = [
# Standard names
(model_dir / "config.json", model_dir / "model.pt", model_dir / "scalers.pkl"),
# Model 307 specific names
(model_dir / "model_config.json", model_dir / "model_307.pt", model_dir / "model_scalers.pkl"),
# CWD standard names
(cwd / "config.json", cwd / "model.pt", cwd / "scalers.pkl"),
# CWD Model 307 names
(cwd / "model_config.json", cwd / "model_307.pt", cwd / "model_scalers.pkl"),
]
config_file = model_file = scalers_file = None
for cfg, mdl, scl in candidates:
if cfg.exists() and mdl.exists() and scl.exists():
config_file, model_file, scalers_file = cfg, mdl, scl
print(f"Found model files in: {cfg.parent}")
break
if not (config_file and model_file and scalers_file):
missing = []
for cfg, mdl, scl in candidates:
if not cfg.exists():
missing.append(str(cfg))
if not mdl.exists():
missing.append(str(mdl))
if not scl.exists():
missing.append(str(scl))
raise FileNotFoundError(
f"Missing model files. Checked multiple locations. Missing: {missing}"
)
with open(config_file) as f:
config = yaml.safe_load(f)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = create_model(
model_type=config['model']['type'],
input_size=len(config['features']),
hidden_size=config['model']['hidden_size'],
num_layers=config['model']['num_layers'],
dropout=config['model']['dropout'],
device=device
)
print(f"Loading weights from: {model_file}")
model.load_state_dict(torch.load(model_file, map_location=device, weights_only=False))
model.eval()
with open(scalers_file, 'rb') as f:
scalers = pickle.load(f)
return model, config, scalers
def load_harvest_data(data_file: Path) -> pd.DataFrame:
"""Load harvest data CSV."""
print(f"Loading data from: {data_file}")
df = pd.read_csv(data_file)
print(f"Loaded {len(df)} rows")
return df
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device):
"""
Phase 1: Growing window detection with threshold crossing.
Expand window day-by-day, check last timestep's detected_prob.
When 3 consecutive days have prob > 0.5, harvest detected.
Returns list of (harvest_date, harvest_idx) tuples.
"""
harvest_dates = []
current_pos = 0
while current_pos < len(field_data):
consecutive_above_threshold = 0
for window_end in range(current_pos + 1, len(field_data) + 1):
window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True)
try:
features = extract_features(window_data, config['features'], ci_column=ci_column)
# Apply scalers
for fi, scaler in enumerate(scalers):
try:
features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
except Exception:
pass
# Run model
with torch.no_grad():
x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
imminent_probs, detected_probs = model(x_tensor)
detected_probs = detected_probs.squeeze(0).cpu().numpy()
# Check LAST timestep
last_prob = detected_probs[-1]
if last_prob > 0.5:
consecutive_above_threshold += 1
else:
consecutive_above_threshold = 0
# Harvest detected: 3 consecutive days above threshold
if consecutive_above_threshold >= 3:
harvest_date = field_data.iloc[current_pos + window_end - 3]['Date']
harvest_dates.append((harvest_date, current_pos + window_end - 3))
# Reset to next day after harvest
current_pos = current_pos + window_end - 2
break
except Exception:
continue
else:
break
return harvest_dates
def run_phase2_refinement(field_data, phase1_harvests, model, config, scalers, ci_column, device):
"""
Phase 2: Refinement with ±40 day window.
For each Phase 1 harvest, extract window and refine with argmax.
Returns list of (harvest_date, harvest_idx) tuples.
"""
refined_harvests = []
field_data = field_data.sort_values('Date').reset_index(drop=True)
for i, (phase1_harvest_date, phase1_idx) in enumerate(phase1_harvests):
try:
# Determine season start
if i == 0:
season_start_date = field_data.iloc[0]['Date']
else:
prev_harvest_idx = phase1_harvests[i-1][1]
season_start_idx = prev_harvest_idx + 1
if season_start_idx >= len(field_data):
break
season_start_date = field_data.iloc[season_start_idx]['Date']
# Extract ±40 day window
window_start_date = season_start_date - pd.Timedelta(days=40)
window_end_date = phase1_harvest_date + pd.Timedelta(days=40)
window_start_idx = max(0, (field_data['Date'] >= window_start_date).idxmax() if (field_data['Date'] >= window_start_date).any() else 0)
window_end_idx = min(len(field_data), (field_data['Date'] <= window_end_date).idxmax() + 1 if (field_data['Date'] <= window_end_date).any() else len(field_data))
if window_end_idx <= window_start_idx:
refined_harvests.append((phase1_harvest_date, phase1_idx))
continue
window_data = field_data.iloc[window_start_idx:window_end_idx].copy().reset_index(drop=True)
# Extract features for full window
features = extract_features(window_data, config['features'], ci_column=ci_column)
# Apply scalers
for fi, scaler in enumerate(scalers):
try:
features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
except Exception:
pass
# Run model once on full window
with torch.no_grad():
x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
imminent_probs, detected_probs = model(x_tensor)
detected_probs = detected_probs.squeeze(0).cpu().numpy()
# Find refined harvest (argmax in window)
refined_idx_in_window = int(np.argmax(detected_probs))
refined_idx_global = window_start_idx + refined_idx_in_window
refined_harvest_date = field_data.iloc[refined_idx_global]['Date']
refined_harvests.append((refined_harvest_date, refined_idx_global))
except Exception:
refined_harvests.append((phase1_harvest_date, phase1_idx))
return refined_harvests
def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=None):
"""
Two-step harvest detection for each field:
1. Phase 1: Growing window with 3-day threshold confirmation
2. Phase 2: ±40 day refinement with argmax
Returns list of dicts with field, season_start_date, season_end_date, etc.
"""
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []
ci_column = config['data']['ci_column']
# Group by field and count total fields for progress
field_groups = list(df.groupby('field'))
total_fields = len(field_groups)
harvests_found = 0
print(f" Processing {total_fields} fields...")
for idx, (field, field_data) in enumerate(field_groups, 1):
# Simple progress indicator
pct = int((idx / total_fields) * 100)
bar_length = 40
filled = int((idx / total_fields) * bar_length)
bar = "" * filled + "" * (bar_length - filled)
print(f" [{bar}] {pct:3d}% ({idx}/{total_fields} fields)", end='\r')
field_data = field_data.sort_values('Date').reset_index(drop=True)
# Phase 1: Growing window detection
phase1_harvests = run_phase1_growing_window(field_data, model, config, scalers, ci_column, device)
if not phase1_harvests:
continue
# Phase 2: Refinement
phase2_harvests = run_phase2_refinement(field_data, phase1_harvests, model, config, scalers, ci_column, device)
# Store results
for i, (harvest_date, harvest_idx) in enumerate(phase2_harvests):
if i == 0:
season_start_date = field_data.iloc[0]['Date']
else:
prev_harvest_idx = phase2_harvests[i-1][1]
season_start_idx = prev_harvest_idx + 1
if season_start_idx >= len(field_data):
break
season_start_date = field_data.iloc[season_start_idx]['Date']
season_end_date = harvest_date
result = {
'field': field,
'season': i + 1,
'season_start_date': season_start_date,
'season_end_date': season_end_date,
'phase2_harvest_date': harvest_date,
}
results.append(result)
harvests_found += 1
print() # New line after progress bar
print(f" ✓ Complete: Found {harvests_found} harvest events across {total_fields} fields")
return results
def build_production_harvest_table(refined_results: List[Dict]) -> pd.DataFrame:
"""
Build a DataFrame from refined results with columns for production pipeline.
One row per field/season with season start and end dates (formatted as YYYY-MM-DD).
"""
if not refined_results:
print("WARNING: No refined results to build table")
return pd.DataFrame(columns=['field', 'season', 'season_start_date', 'season_end_date'])
# Build DataFrame
df = pd.DataFrame(refined_results)
# Ensure date columns are datetime
df['season_start_date'] = pd.to_datetime(df['season_start_date']).dt.strftime('%Y-%m-%d')
df['season_end_date'] = pd.to_datetime(df['season_end_date']).dt.strftime('%Y-%m-%d')
df['phase1_harvest_date'] = pd.to_datetime(df['phase1_harvest_date']).dt.strftime('%Y-%m-%d')
print(f"Built production table with {len(df)} field/season combinations")
return df

BIN
python_app/model_307.pt Normal file

Binary file not shown.

View file

@ -0,0 +1,144 @@
{
"name": "307_dropout02_with_doy",
"description": "Production Model 307: LSTM-based harvest detection (Phase 3, minimal regularization)",
"model_info": {
"type": "LSTM",
"architecture": "Unidirectional LSTM with dual output heads (imminent + detected)",
"total_parameters": 105120,
"input_features": 14,
"hidden_units": 256,
"output_heads": 2,
"training_data": "Historical multi-season CI data from multiple estates",
"validation_method": "5-fold cross-validation",
"device": "GPU (CUDA) or CPU fallback"
},
"production_scripts": {
"baseline": {
"script": "01_harvest_baseline_prediction.py",
"frequency": "Run ONCE during setup",
"purpose": "Predict all harvest dates (ground truth baseline)",
"input": "ci_data_for_python.csv (complete historical data)",
"output": "harvest_production_export.xlsx",
"time_estimate": "5-30 minutes depending on data volume"
},
"monitoring": {
"script": "02_harvest_imminent_weekly.py",
"frequency": "Run WEEKLY (or daily if required)",
"purpose": "Real-time harvest status and imminent alerts",
"input": "ci_data_for_python.csv (recent data)",
"output": "harvest_imminent_weekly.csv",
"time_estimate": "1-5 minutes"
}
},
"features": [
"CI_raw",
"7d_MA",
"14d_MA",
"21d_MA",
"7d_velocity",
"14d_velocity",
"21d_velocity",
"7d_min",
"14d_min",
"21d_min",
"7d_std",
"14d_std",
"21d_std",
"DOY_normalized"
],
"model": {
"type": "LSTM",
"hidden_size": 256,
"num_layers": 1,
"dropout": 0.2
},
"training": {
"imminent_days_before": 28,
"imminent_days_before_end": 1,
"detected_days_after_start": 1,
"detected_days_after_end": 21,
"k_folds": 5,
"num_epochs": 150,
"patience": 20,
"learning_rate": 0.001,
"batch_size": 4
},
"data": {
"csv_path": "../lstm_complete_data.csv",
"ci_column": "FitData",
"test_fraction": 0.15,
"seed": 42
},
"workflow_instructions": {
"overview": "Model 307 uses a two-script approach: baseline setup + weekly monitoring",
"step_1_baseline": {
"description": "Establish historical harvest date reference for all fields",
"script": "01_harvest_baseline_prediction.py",
"when": "Run once after setting up CI extraction pipeline",
"command": "conda activate python_gpu && python 01_harvest_baseline_prediction.py",
"input_data": "ci_data_for_python.csv (all available historical CI data)",
"output_file": "harvest_production_export.xlsx (ground truth baseline)",
"columns": [
"field - Field ID",
"sub_field - Sub-field designation",
"season - Season number (1, 2, 3...)",
"year - Year of harvest",
"season_start_date - Start of growing season",
"season_end_date - End of season (harvest date)",
"phase1_harvest_date - Refined harvest prediction"
],
"notes": "This becomes your reference - compare all weekly monitoring against this"
},
"step_2_monitoring": {
"description": "Weekly real-time harvest status and imminent alerts",
"script": "02_harvest_imminent_weekly.py",
"when": "Run every week (e.g., Mondays) or daily if near harvest",
"command": "conda activate python_gpu && python 02_harvest_imminent_weekly.py",
"input_data": "ci_data_for_python.csv (latest CI data from 02b conversion)",
"output_file": "harvest_imminent_weekly.csv",
"columns": [
"field - Field ID",
"sub_field - Sub-field designation",
"imminent_prob - Likelihood of harvest readiness in next 28 days (0.0-1.0)",
"detected_prob - Current harvest probability (0.0-1.0)",
"week - ISO week number",
"year - Year",
"as_of_date - Latest date in dataset",
"num_days - Days of history used"
],
"alert_thresholds": {
"imminent_high": "imminent_prob > 0.7 (prepare harvest)",
"imminent_medium": "imminent_prob 0.5-0.7 (monitor closely)",
"detected_high": "detected_prob > 0.6 (active harvesting)"
}
},
"integration_with_r_pipeline": {
"before_model_307": [
"Planet 8-band download: download_8band_pu_optimized.ipynb",
"CI extraction: 02_ci_extraction.R",
"Convert to CSV: 02b_convert_rds_to_csv.R (outputs ci_data_for_python.csv)"
],
"model_307_here": [
"BASELINE: 01_harvest_baseline_prediction.py (run once)",
"MONITORING: 02_harvest_imminent_weekly.py (run weekly)"
],
"after_model_307": [
"Field analysis: 09b_field_analysis_weekly.R (reads harvest predictions)",
"Reports: 10_CI_report_with_kpis.Rmd (includes harvest status)"
]
},
"environment_requirements": {
"python_env": "python_gpu",
"activation": "conda activate python_gpu",
"required_packages": [
"torch (GPU-enabled)",
"pandas",
"numpy",
"scikit-learn",
"pyyaml",
"openpyxl"
],
"gpu": "NVIDIA GPU with CUDA (optional - falls back to CPU if unavailable)"
}
}
}

Binary file not shown.

View file

@ -15,9 +15,113 @@
suppressPackageStartupMessages({
library(tidyverse)
library(lubridate)
library(zoo)
library(here)
})
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
#' Convert wide format RDS to long format
#'
#' @param ci_data_wide Tibble with columns: field, sub_field, and dates as columns
#' @return Long format tibble: field, sub_field, Date, FitData
wide_to_long_ci_data <- function(ci_data_wide) {
ci_data_wide %>%
pivot_longer(
cols = -c(field, sub_field),
names_to = "Date",
values_to = "FitData",
values_drop_na = TRUE
) %>%
mutate(
Date = as.Date(Date),
FitData = as.numeric(FitData)
) %>%
filter(!is.na(FitData))
}
#' Create daily interpolated sequences with DOY for each field
#'
#' For each field/sub_field combination, creates complete daily sequences from first to last date,
#' fills in measurements, and interpolates missing dates.
#'
#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
#' @return Tibble with: field, sub_field, Date, FitData, DOY, value
create_interpolated_daily_sequences <- function(ci_data_long) {
ci_data_long %>%
group_by(field, sub_field) %>%
nest() %>%
mutate(
data = map(data, function(df) {
# Sort measurements by date
df <- df %>% arrange(Date)
# Create complete daily sequence from first to last date
date_seq <- seq(min(df$Date), max(df$Date), by = "day")
# Build daily dataframe (field/sub_field stay in outer df, not here)
daily_df <- tibble(
Date = date_seq,
value = NA_real_,
FitData = NA_real_,
DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
)
# Fill in actual measurement values
for (i in seq_len(nrow(df))) {
idx <- which(daily_df$Date == df$Date[i])
if (length(idx) > 0) {
daily_df$value[idx] <- df$FitData[i]
}
}
# Interpolate missing dates linearly
daily_df$FitData <- zoo::na.approx(daily_df$value, na.rm = FALSE)
daily_df
})
) %>%
unnest(data) %>%
select(field, sub_field, Date, FitData, DOY, value) %>%
arrange(field, Date)
}
#' Validate conversion output
#'
#' @param ci_data_python Tibble with converted CI data
#' @return Invisibly returns the tibble (for piping)
validate_conversion_output <- function(ci_data_python) {
cat(sprintf("\nValidation:\n"))
cat(sprintf(" Unique fields: %d\n", n_distinct(ci_data_python$field)))
cat(sprintf(" Total daily rows: %d\n", nrow(ci_data_python)))
cat(sprintf(" Date range: %s to %s\n",
min(ci_data_python$Date, na.rm = TRUE),
max(ci_data_python$Date, na.rm = TRUE)))
cat(sprintf(" FitData range: %.2f to %.2f\n",
min(ci_data_python$FitData, na.rm = TRUE),
max(ci_data_python$FitData, na.rm = TRUE)))
cat(sprintf(" Raw measurements: %d\n", sum(!is.na(ci_data_python$value))))
cat(sprintf(" Interpolated values: %d\n", sum(is.na(ci_data_python$value) & !is.na(ci_data_python$FitData))))
invisible(ci_data_python)
}
#' Print next steps message
print_next_steps <- function() {
cat("\nNext steps for Python harvest detection:\n")
cat(" 1. Read this CSV file in Python\n")
cat(" 2. Group by field to identify seasons\n")
cat(" 3. Run LSTM model to detect harvest dates\n")
cat(" 4. Save predicted harvest dates to Excel\n")
cat(" 5. Use output in script 03 for interpolation\n")
}
# ============================================================================
# MAIN FUNCTION
# ============================================================================
main <- function() {
# Process command line arguments
args <- commandArgs(trailingOnly = TRUE)
@ -28,7 +132,7 @@ main <- function() {
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
project_dir <- "esa"
project_dir <- "angata"
}
# Make available globally
@ -49,9 +153,17 @@ main <- function() {
})
# Define paths
ci_data_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals")
input_file <- file.path(ci_data_dir, "combined_CI_data.rds")
output_file <- file.path(ci_data_dir, "ci_data_for_python.csv")
ci_data_source_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals")
ci_data_output_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "ci_data_for_python")
# Create output directory if it doesn't exist (for new projects)
if (!dir.exists(ci_data_output_dir)) {
dir.create(ci_data_output_dir, recursive = TRUE, showWarnings = FALSE)
cat(sprintf("✓ Created output directory: %s\n", ci_data_output_dir))
}
input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds")
output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
# Check if input file exists
if (!file.exists(input_file)) {
@ -61,52 +173,32 @@ main <- function() {
cat(sprintf("Loading: %s\n", input_file))
# Load RDS file
ci_data <- readRDS(input_file) %>%
ci_data_wide <- readRDS(input_file) %>%
as_tibble()
cat(sprintf(" Loaded %d rows\n", nrow(ci_data)))
cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", ")))
cat(sprintf(" Loaded %d rows\n", nrow(ci_data_wide)))
cat(sprintf(" Format: WIDE (field, sub_field, then dates as columns)\n"))
cat(sprintf(" Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", ")))
# Prepare data for Python
ci_data_python <- ci_data %>%
# Ensure standard column names
rename(
field = field,
sub_field = sub_field,
Date = Date,
FitData = FitData,
DOY = DOY
) %>%
# Add 'value' as an alias for FitData (sometimes needed)
mutate(value = FitData) %>%
# Keep only necessary columns
select(field, sub_field, Date, FitData, DOY, value) %>%
# Sort by field and date
arrange(field, Date)
# Step 1: Convert from WIDE to LONG format
cat("\nStep 1: Converting from wide to long format...\n")
ci_data_long <- wide_to_long_ci_data(ci_data_wide)
# Validate data
cat(sprintf("\nValidation:\n"))
cat(sprintf(" Unique fields: %d\n", n_distinct(ci_data_python$field)))
cat(sprintf(" Date range: %s to %s\n",
min(ci_data_python$Date, na.rm = TRUE),
max(ci_data_python$Date, na.rm = TRUE)))
cat(sprintf(" FitData range: %.2f to %.2f\n",
min(ci_data_python$FitData, na.rm = TRUE),
max(ci_data_python$FitData, na.rm = TRUE)))
cat(sprintf(" Missing FitData: %d rows\n", sum(is.na(ci_data_python$FitData))))
# Step 2: Create complete daily sequences with interpolation
cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
# Save to CSV
cat(sprintf("\nSaving to: %s\n", output_file))
# Step 3: Validate output
cat("\nStep 3: Validating output...")
validate_conversion_output(ci_data_python)
# Step 4: Save to CSV
cat(sprintf("\nStep 4: Saving to CSV...\n"))
cat(sprintf(" Output: %s\n", output_file))
write_csv(ci_data_python, output_file)
cat(sprintf("✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
cat("\nNext steps for Python harvest detection:\n")
cat(" 1. Read this CSV file in Python\n")
cat(" 2. Group by field to identify seasons\n")
cat(" 3. Run LSTM model to detect harvest dates\n")
cat(" 4. Save predicted harvest dates to Excel\n")
cat(" 5. Use output in script 03 for interpolation\n")
cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
print_next_steps()
}
if (sys.nframe() == 0) {

1328
webapps/geojson_viewer.html Normal file

File diff suppressed because it is too large Load diff

View file

@ -212,6 +212,22 @@
<a href="./data_validation_tool/" class="app-btn">Open Tool</a>
</div>
</div>
<!-- GeoJSON Viewer -->
<div class="app-card">
<div class="app-icon">📍</div>
<div class="app-content">
<h2>GeoJSON Viewer</h2>
<p>Upload and visualize GeoJSON files on an interactive map with feature properties.</p>
<ul class="app-features">
<li>Upload GeoJSON files</li>
<li>Interactive map view</li>
<li>View feature properties</li>
<li>Download exports</li>
</ul>
<a href="./geojson_viewer.html" class="app-btn">Open Viewer</a>
</div>
</div>
</div>
<footer>