#!/usr/bin/env python3 """ Debug script: Test if script 22 logic is working Tests the two-step refinement on a single known field """ import sys import time import pandas as pd import numpy as np import torch from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from harvest_date_pred_utils import ( load_model_and_config, extract_features, run_phase1_growing_window, ) project_name = "angata" # Find the workspace root by looking for laravel_app folder script_dir = Path(__file__).parent root = script_dir while root != root.parent: if (root / "laravel_app").exists(): break root = root.parent base_storage = root / "laravel_app" / "storage" / "app" / project_name / "Data" CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv" MODEL_DIR = root / "python_app" print("="*80) print("DEBUG: Script 22 Two-Step Refinement Logic") print("="*80) # Load model print("\n[1] Loading model...") model, config, scalers = load_model_and_config(MODEL_DIR) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f" Device: {device}") print(f" Model features: {config['features']}") # Load CI data print("\n[2] Loading CI data...") ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str}) ci_data['Date'] = pd.to_datetime(ci_data['Date']) print(f" Total rows: {len(ci_data)}") print(f" Fields: {ci_data['field'].nunique()}") print(f" Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}") # Test on a known field (field 779 from our previous tests) test_field = "779" field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True) print(f"\n[3] Testing on field {test_field}...") print(f" Data points: {len(field_data)}") print(f" Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}") if len(field_data) == 0: print(f" ERROR: No data for field {test_field}") sys.exit(1) # Extract features print(f"\n[4] Extracting features for field {test_field}...") try: features = extract_features(field_data.reset_index(drop=True), config['features'], ci_column='value') print(f" Features shape: {features.shape}") print(f" Features dtype: {features.dtype}") except Exception as e: print(f" ERROR: Could not extract features: {e}") sys.exit(1) # Normalize and run model print(f"\n[5] Running Phase 1 GROWING WINDOW method (threshold=0.5, consecutive=3)...") print(f" This simulates real production: expanding windows, checking each day") print(f" Expected: ~477 model runs for 477 days (SLOW)") import time start_time = time.time() # Add instrumentation to see how many model runs are happening original_run = run_phase1_growing_window def instrumented_run(*args, **kwargs): import sys from harvest_date_pred_utils import extract_features field_data = args[0] model = args[1] config = args[2] scalers = args[3] ci_column = args[4] device = args[5] threshold = kwargs.get('threshold', 0.3) consecutive_days = kwargs.get('consecutive_days', 2) harvest_dates = [] current_pos = 0 model_runs = 0 print(f" Starting growing window loop...") while current_pos < len(field_data): consecutive_above_threshold = 0 loop_start = current_pos for window_end in range(current_pos + 1, len(field_data) + 1): window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True) try: features = extract_features(window_data, config['features'], ci_column=ci_column) features_scaled = features.copy().astype(float) for fi, scaler in enumerate(scalers): try: features_scaled[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten() except Exception as e: raise ValueError(f"Scaler {fi} failed: {e}") import torch with torch.no_grad(): x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device) imminent_probs, detected_probs = model(x_tensor) model_runs += 1 last_prob = detected_probs[0, -1].item() if last_prob > threshold: consecutive_above_threshold += 1 else: consecutive_above_threshold = 0 if consecutive_above_threshold >= consecutive_days: harvest_date = field_data.iloc[current_pos + window_end - consecutive_days]['Date'] harvest_dates.append((harvest_date, current_pos + window_end - consecutive_days)) current_pos = current_pos + window_end - consecutive_days + 1 break except Exception as e: pass else: break print(f" Model runs performed: {model_runs}") return harvest_dates phase1_results = instrumented_run( field_data.reset_index(drop=True), model, config, scalers, 'value', device, threshold=0.5, consecutive_days=3 ) elapsed = time.time() - start_time print(f"\n Time elapsed: {elapsed:.2f}s") if phase1_results: print(f" ✓ Phase 1 detected {len(phase1_results)} harvest(s):") # Get probabilities for display by running model once on full field with torch.no_grad(): X = features.reshape(1, -1, len(config['features'])) X_normalized = np.zeros_like(X) for fi, scaler in enumerate(scalers): X_normalized[0, :, fi] = scaler.transform(X[0, :, fi].reshape(-1, 1)).flatten() X_tensor = torch.from_numpy(X_normalized).float().to(device) _, detected_probs = model(X_tensor) detected_np = detected_probs[0].cpu().numpy() for harvest_date, harvest_idx in phase1_results: prob = detected_np[harvest_idx] if harvest_idx < len(detected_np) else 0.0 print(f" {harvest_date.date()}: index {harvest_idx}, probability={prob:.4f}") else: print(f" ✗ Phase 1: No harvest detected")