#!/usr/bin/env python3 """ Quick test: Verify feature extraction works """ import sys import pandas as pd import numpy as np from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from harvest_date_pred_utils import extract_features, load_model_and_config project_name = "angata" base_storage = Path("../laravel_app/storage/app") / project_name / "Data" CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv" print("="*80) print("DEBUG: Feature Extraction Test") print("="*80) # Load model config print("\n[1] Loading model config...") model, config, scalers = load_model_and_config(Path(".")) print(f" Config features: {config['features']}") print(f" Number of features: {len(config['features'])}") # Load CI data print("\n[2] Loading CI data...") ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str}) ci_data['Date'] = pd.to_datetime(ci_data['Date']) print(f" Columns: {ci_data.columns.tolist()}") print(f" Total rows: {len(ci_data)}") # Test on a single field test_field = "1" field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True) print(f"\n[3] Testing on field {test_field}...") print(f" Data points: {len(field_data)}") print(f" Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}") print(f" Columns in field data: {field_data.columns.tolist()}") print(f" Sample values:") print(field_data[['Date', 'value']].head()) # Test feature extraction on first 50 days print(f"\n[4] Extracting features for first 50 days...") try: subset = field_data.iloc[:50].copy() features = extract_features(subset, config['features'], ci_column='value') print(f" āœ“ Success!") print(f" Feature shape: {features.shape}") print(f" Expected shape: (50, {len(config['features'])})") print(f" Feature values sample (first 5 days):") for i in range(min(5, features.shape[0])): print(f" Day {i}: {features[i]}") except Exception as e: print(f" āœ— Error: {e}") import traceback traceback.print_exc() print("\n[5] Testing on growing windows...") try: for window_size in [10, 20, 30, 50]: window_data = field_data.iloc[:window_size].copy() features = extract_features(window_data, config['features'], ci_column='value') print(f" Window size {window_size}: shape={features.shape}, min={features.min():.4f}, max={features.max():.4f}") except Exception as e: print(f" āœ— Error: {e}") import traceback traceback.print_exc() print("\nāœ“ Feature extraction test complete")