SmartCane/python_app/harvest_detection_experiments/tests/test_feature_extraction.py
Timon fabbf3214d Enhance harvest detection logic and testing framework
- Updated `detect_mosaic_mode` function to check for grid-size subdirectories in addition to tile-named files.
- Added comprehensive tests for DOY reset logic in `test_doy_logic.py`.
- Implemented feature extraction tests in `test_feature_extraction.py`.
- Created tests for growing window method in `test_growing_window_only.py`.
- Developed a complete model inference test in `test_model_inference.py`.
- Added a debug script for testing two-step refinement logic in `test_script22_debug.py`.
2026-01-15 14:30:54 +01:00

74 lines
2.5 KiB
Python

#!/usr/bin/env python3
"""
Quick test: Verify feature extraction works
"""
import sys
import pandas as pd
import numpy as np
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from harvest_date_pred_utils import extract_features, load_model_and_config
project_name = "angata"
base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv"
print("="*80)
print("DEBUG: Feature Extraction Test")
print("="*80)
# Load model config
print("\n[1] Loading model config...")
model, config, scalers = load_model_and_config(Path("."))
print(f" Config features: {config['features']}")
print(f" Number of features: {len(config['features'])}")
# Load CI data
print("\n[2] Loading CI data...")
ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str})
ci_data['Date'] = pd.to_datetime(ci_data['Date'])
print(f" Columns: {ci_data.columns.tolist()}")
print(f" Total rows: {len(ci_data)}")
# Test on a single field
test_field = "1"
field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True)
print(f"\n[3] Testing on field {test_field}...")
print(f" Data points: {len(field_data)}")
print(f" Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}")
print(f" Columns in field data: {field_data.columns.tolist()}")
print(f" Sample values:")
print(field_data[['Date', 'value']].head())
# Test feature extraction on first 50 days
print(f"\n[4] Extracting features for first 50 days...")
try:
subset = field_data.iloc[:50].copy()
features = extract_features(subset, config['features'], ci_column='value')
print(f" ✓ Success!")
print(f" Feature shape: {features.shape}")
print(f" Expected shape: (50, {len(config['features'])})")
print(f" Feature values sample (first 5 days):")
for i in range(min(5, features.shape[0])):
print(f" Day {i}: {features[i]}")
except Exception as e:
print(f" ✗ Error: {e}")
import traceback
traceback.print_exc()
print("\n[5] Testing on growing windows...")
try:
for window_size in [10, 20, 30, 50]:
window_data = field_data.iloc[:window_size].copy()
features = extract_features(window_data, config['features'], ci_column='value')
print(f" Window size {window_size}: shape={features.shape}, min={features.min():.4f}, max={features.max():.4f}")
except Exception as e:
print(f" ✗ Error: {e}")
import traceback
traceback.print_exc()
print("\n✓ Feature extraction test complete")