diff --git a/python_app/22_harvest_baseline_prediction.py b/python_app/22_harvest_baseline_prediction.py index ffd8f14..f39dca6 100644 --- a/python_app/22_harvest_baseline_prediction.py +++ b/python_app/22_harvest_baseline_prediction.py @@ -26,7 +26,9 @@ This is your GROUND TRUTH - compare all future predictions against this baseline Usage: python 01_harvest_baseline_prediction.py [project_name] - conda activate pytorch_gpu; cd "C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\python_app"; python 22_harvest_baseline_prediction.py angata + conda activate pytorch_gpu + cd python_app + python 22_harvest_baseline_prediction.py angata Examples: python 01_harvest_baseline_prediction.py angata @@ -108,7 +110,7 @@ def main(): # [3/4] Run model predictions with two-step detection print("\n[3/4] Running two-step harvest detection...") - print(" (Using threshold=0.3, consecutive_days=2 - tuned for Model 307 output)") + print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)") refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device, phase1_threshold=0.3, phase1_consecutive=2) diff --git a/python_app/harvest_date_pred_utils.py b/python_app/harvest_date_pred_utils.py index ebf77c8..aa4199c 100644 --- a/python_app/harvest_date_pred_utils.py +++ b/python_app/harvest_date_pred_utils.py @@ -184,18 +184,47 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p return features.fillna(0) -def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: str = 'FitData') -> np.ndarray: - """Extract and return specified features as numpy array.""" +def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: str = 'FitData', + season_anchor_day: int = None, lookback_start: int = 0) -> np.ndarray: + """ + Extract and return specified features as numpy array. + + Args: + data_df: DataFrame with Date and CI data (may be a window after a harvest) + feature_names: List of feature names to extract + ci_column: Name of CI column + season_anchor_day: Day in FULL sequence where this season started (for DOY reset) + DOY will be recalculated as: 1, 2, 3, ... from this point + lookback_start: Starting index in original full data (for season reset calculation) + + Returns: + NumPy array of shape (len(data_df), len(feature_names)) + """ # Compute all CI features ci_series = data_df[ci_column].astype(float) - doy_series = pd.Series(range(len(data_df)), index=data_df.index) % 365 if 'DOY_normalized' in feature_names else None + + # Compute DOY (age/days since season start) - NOT day-of-year! + # DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365) + # It only resets to 1 after a harvest is detected (new season) + doy_series = None + if 'DOY_normalized' in feature_names: + if season_anchor_day is not None and lookback_start >= season_anchor_day: + # Season was reset after harvest. Recalculate DOY as simple counter from 1 + # This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ... + doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) + elif 'DOY' in data_df.columns: + # Use DOY directly from CSV - already calculated as continuous age counter + doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index) + else: + # Fallback: create continuous age counter (1, 2, 3, ...) + doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) all_features = compute_ci_features(ci_series, doy_series) # Select requested features requested = [f for f in feature_names if f in all_features.columns] if not requested: - raise ValueError(f"No valid features found. Requested: {feature_names}") + raise ValueError(f"No valid features found. Requested: {feature_names}, Available: {all_features.columns.tolist()}") return all_features[requested].values @@ -274,42 +303,63 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame: def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device, threshold=0.45, consecutive_days=2): """ - Phase 1: Growing window detection with threshold crossing. - Expand window day-by-day, check last timestep's detected_prob. - When N consecutive days have prob > threshold, harvest detected. + Phase 1: Growing window detection with DOY season reset. + + For each detected harvest, reset DOY counter for the next season. + This allows the model to detect multiple consecutive harvests in multi-year data. + + Algorithm: + 1. Start with season_anchor_day = 0 (DOY 1 at day 0) + 2. Expand window: [0:1], [0:2], [0:3], ... until threshold crossed + 3. When harvest detected: record date, set new season_anchor = day after harvest + 4. Continue from next season start Args: - threshold (float): Probability threshold (default 0.45, tuned for Model 307) - consecutive_days (int): Required consecutive days above threshold (default 2, reduced from 3 for robustness) + threshold (float): Probability threshold (default 0.45) + consecutive_days (int): Required consecutive days above threshold (default 2) Returns list of (harvest_date, harvest_idx) tuples. """ harvest_dates = [] + season_anchor_day = 0 # DOY 1 starts at day 0 current_pos = 0 while current_pos < len(field_data): consecutive_above_threshold = 0 + min_window_size = 120 # Need at least 120 days (~4 months) for patterns to establish for window_end in range(current_pos + 1, len(field_data) + 1): window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True) + # Skip if window is too small (model needs long sequences for pattern learning) + if len(window_data) < min_window_size: + continue + try: - features = extract_features(window_data, config['features'], ci_column=ci_column) + # CRITICAL: Pass season_anchor_day so DOY resets after harvest + features = extract_features( + window_data, + config['features'], + ci_column=ci_column, + season_anchor_day=season_anchor_day, + lookback_start=current_pos + ) # Apply scalers + features_scaled = features.copy().astype(float) for fi, scaler in enumerate(scalers): try: - features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten() + features_scaled[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten() except Exception: pass - # Run model + # Run model on expanding window with torch.no_grad(): - x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device) + x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device) imminent_probs, detected_probs = model(x_tensor) - # Check LAST timestep - last_prob = detected_probs[-1] + # Check LAST timestep only + last_prob = detected_probs[0, -1].item() if last_prob > threshold: consecutive_above_threshold += 1 @@ -318,16 +368,21 @@ def run_phase1_growing_window(field_data, model, config, scalers, ci_column, dev # Harvest detected: N consecutive days above threshold if consecutive_above_threshold >= consecutive_days: - harvest_date = field_data.iloc[current_pos + window_end - consecutive_days]['Date'] - harvest_dates.append((harvest_date, current_pos + window_end - consecutive_days)) + harvest_idx = current_pos + window_end - consecutive_days + harvest_date = field_data.iloc[harvest_idx]['Date'] + harvest_dates.append((harvest_date, harvest_idx)) - # Reset to next day after harvest - current_pos = current_pos + window_end - consecutive_days + 1 + # CRITICAL: Reset season anchor for next season + # DOY 1 starts at day after harvest + season_anchor_day = harvest_idx + 1 + current_pos = harvest_idx + 1 break - except Exception: + except Exception as e: + # Skip window on error continue else: + # No more harvests found break return harvest_dates @@ -413,7 +468,9 @@ def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=Non device = torch.device("cuda" if torch.cuda.is_available() else "cpu") results = [] - ci_column = config['data']['ci_column'] + # CI column is 'FitData' (interpolated CI) - NOT 'value' (raw with NAs) + # 'FitData' is already gap-filled by R stage 03, ready for ML + ci_column = 'FitData' # Group by field and count total fields for progress field_groups = list(df.groupby('field')) diff --git a/python_app/harvest_detection_experiments/tests/test_doy_logic.py b/python_app/harvest_detection_experiments/tests/test_doy_logic.py new file mode 100644 index 0000000..dc4edb6 --- /dev/null +++ b/python_app/harvest_detection_experiments/tests/test_doy_logic.py @@ -0,0 +1,65 @@ +""" +Test DOY reset logic for harvest detection. +Verify that DOY resets to 1, 2, 3, ... after harvest is detected. +""" + +import sys +from pathlib import Path +sys.path.insert(0, str(Path.cwd())) + +import pandas as pd +import numpy as np +from harvest_date_pred_utils import extract_features, load_model_and_config +import torch + +# Load sample data +ci_data = pd.read_csv('../laravel_app/storage/app/angata/Data/extracted_ci/ci_data_for_python/ci_data_for_python.csv') +ci_data['Date'] = pd.to_datetime(ci_data['Date']) + +# Get field 779 data +field_779 = ci_data[ci_data['field'] == '779'].reset_index(drop=True) +print(f"Field 779: {len(field_779)} days of data") +print(f"Date range: {field_779['Date'].min().date()} to {field_779['Date'].max().date()}\n") + +# Load model config +model, config, scalers = load_model_and_config(Path.cwd()) + +# Test 1: First season (season_anchor_day = 0) +print("=" * 80) +print("TEST 1: First season (season_anchor_day=0, lookback_start=0)") +print("=" * 80) +window = field_779.iloc[0:20].copy().reset_index(drop=True) +features = extract_features(window, config['features'], ci_column='FitData', + season_anchor_day=0, lookback_start=0) + +# Extract DOY_normalized column (index 13 or find it) +feature_names = config['features'] +doy_idx = feature_names.index('DOY_normalized') if 'DOY_normalized' in feature_names else -1 +if doy_idx >= 0: + doy_values = (features[:, doy_idx] * 450).astype(int) # Denormalize + print(f"Window size: {len(window)} days") + print(f"DOY values: {doy_values[:10]}") + print(f"Expected: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]") + assert list(doy_values[:10]) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "DOY not incrementing correctly!" + print("✓ DOY incrementing correctly for first season\n") + +# Test 2: After harvest detected at day 100, next season starts +print("=" * 80) +print("TEST 2: After harvest at day 100, new season starts (season_anchor_day=101, lookback_start=101)") +print("=" * 80) +harvest_day = 100 +window = field_779.iloc[harvest_day:harvest_day+20].copy().reset_index(drop=True) +features = extract_features(window, config['features'], ci_column='FitData', + season_anchor_day=harvest_day+1, lookback_start=harvest_day+1) + +if doy_idx >= 0: + doy_values = (features[:, doy_idx] * 450).astype(int) # Denormalize + print(f"Window size: {len(window)} days (starting at day {harvest_day})") + print(f"DOY values: {doy_values[:10]}") + print(f"Expected: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] (fresh season)") + assert list(doy_values[:10]) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "DOY not reset after harvest!" + print("✓ DOY reset correctly for new season\n") + +print("=" * 80) +print("ALL TESTS PASSED! DOY logic is correct.") +print("=" * 80) diff --git a/python_app/harvest_detection_experiments/tests/test_feature_extraction.py b/python_app/harvest_detection_experiments/tests/test_feature_extraction.py new file mode 100644 index 0000000..bd75ffc --- /dev/null +++ b/python_app/harvest_detection_experiments/tests/test_feature_extraction.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +""" +Quick test: Verify feature extraction works +""" + +import sys +import pandas as pd +import numpy as np +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) + +from harvest_date_pred_utils import extract_features, load_model_and_config + +project_name = "angata" +base_storage = Path("../laravel_app/storage/app") / project_name / "Data" +CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv" + +print("="*80) +print("DEBUG: Feature Extraction Test") +print("="*80) + +# Load model config +print("\n[1] Loading model config...") +model, config, scalers = load_model_and_config(Path(".")) +print(f" Config features: {config['features']}") +print(f" Number of features: {len(config['features'])}") + +# Load CI data +print("\n[2] Loading CI data...") +ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str}) +ci_data['Date'] = pd.to_datetime(ci_data['Date']) +print(f" Columns: {ci_data.columns.tolist()}") +print(f" Total rows: {len(ci_data)}") + +# Test on a single field +test_field = "1" +field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True) +print(f"\n[3] Testing on field {test_field}...") +print(f" Data points: {len(field_data)}") +print(f" Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}") +print(f" Columns in field data: {field_data.columns.tolist()}") +print(f" Sample values:") +print(field_data[['Date', 'value']].head()) + +# Test feature extraction on first 50 days +print(f"\n[4] Extracting features for first 50 days...") +try: + subset = field_data.iloc[:50].copy() + features = extract_features(subset, config['features'], ci_column='value') + print(f" ✓ Success!") + print(f" Feature shape: {features.shape}") + print(f" Expected shape: (50, {len(config['features'])})") + print(f" Feature values sample (first 5 days):") + for i in range(min(5, features.shape[0])): + print(f" Day {i}: {features[i]}") +except Exception as e: + print(f" ✗ Error: {e}") + import traceback + traceback.print_exc() + +print("\n[5] Testing on growing windows...") +try: + for window_size in [10, 20, 30, 50]: + window_data = field_data.iloc[:window_size].copy() + features = extract_features(window_data, config['features'], ci_column='value') + print(f" Window size {window_size}: shape={features.shape}, min={features.min():.4f}, max={features.max():.4f}") +except Exception as e: + print(f" ✗ Error: {e}") + import traceback + traceback.print_exc() + +print("\n✓ Feature extraction test complete") diff --git a/python_app/harvest_detection_experiments/tests/test_growing_window_only.py b/python_app/harvest_detection_experiments/tests/test_growing_window_only.py new file mode 100644 index 0000000..feec06e --- /dev/null +++ b/python_app/harvest_detection_experiments/tests/test_growing_window_only.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +""" +Test ONLY the growing window method (what production actually uses) +Never run model on full sequence - only on expanding windows + +This matches real deployment where data arrives daily +""" + +import sys +import pandas as pd +import numpy as np +import torch +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from harvest_date_pred_utils import ( + load_model_and_config, + extract_features, +) + +project_name = "angata" + +# Find root by walking up until we find laravel_app +script_dir = Path(__file__).parent +root = script_dir +while root != root.parent: # Stop at filesystem root + if (root / "laravel_app").exists(): + break + root = root.parent + +base_storage = root / "laravel_app" / "storage" / "app" / project_name / "Data" +CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv" +MODEL_DIR = root / "python_app" + +print("="*80) +print("GROWING WINDOW METHOD ONLY (Real Production Simulation)") +print("="*80) + +# Load model +print("\n[1] Loading model...") +model, config, scalers = load_model_and_config(MODEL_DIR) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Load CI data +print("\n[2] Loading CI data...") +ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str}) +ci_data['Date'] = pd.to_datetime(ci_data['Date']) + +# Test on field 779 +test_field = "779" +field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True) + +print(f"\n[3] Field {test_field}: {len(field_data)} data points") +print(f" Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}") + +# Simulate growing window (real production) +print(f"\n[4] Simulating growing window (expanding daily)...") + +harvest_dates = [] +current_pos = 0 +consecutive_above = 0 +threshold = 0.3 +consecutive_days = 2 +model_runs = 0 + +while current_pos < len(field_data): + consecutive_above = 0 + found_harvest = False + + for window_end in range(current_pos + 1, len(field_data) + 1): + # Expand window: current_pos to window_end + window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True) + + try: + # Extract features for THIS window only + features = extract_features(window_data, config['features'], ci_column='value') + + # Normalize + features_scaled = features.copy().astype(float) + for fi, scaler in enumerate(scalers): + features_scaled[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten() + + # Run model on expanding window + with torch.no_grad(): + x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device) + imminent_probs, detected_probs = model(x_tensor) + + model_runs += 1 + + # Check LAST timestep only + last_prob = detected_probs[0, -1].item() + last_date = window_data.iloc[-1]['Date'].date() + + # Print every 50th window to track progress + if window_end % 50 == 0 or window_end < 10: + print(f" Window [{current_pos:3d}:{window_end:3d}] ({last_date}): prob={last_prob:.4f}", end="") + if last_prob > threshold: + print(" ✓ ABOVE", end="") + print() + + # Check threshold + if last_prob > threshold: + consecutive_above += 1 + else: + consecutive_above = 0 + + # Harvest detected + if consecutive_above >= consecutive_days: + harvest_idx = current_pos + window_end - consecutive_days + harvest_date = field_data.iloc[harvest_idx]['Date'] + harvest_dates.append((harvest_date, harvest_idx, last_prob)) + print(f"\n ✓ HARVEST DETECTED at {harvest_date.date()} (index {harvest_idx})") + print(f" {consecutive_days} consecutive days above {threshold}") + + # Jump past this harvest + current_pos = harvest_idx + 1 + found_harvest = True + break + + except Exception as e: + print(f" ERROR at window [{current_pos}:{window_end}]: {e}") + continue + + if not found_harvest: + break + +print(f"\n[5] Results:") +print(f" Total model runs: {model_runs}") +print(f" Harvests found: {len(harvest_dates)}") + +if harvest_dates: + print(f"\n Harvest dates:") + for date, idx, prob in harvest_dates: + print(f" {date.date()}: index {idx}, last_prob={prob:.4f}") +else: + print(f"\n No harvests detected") + +print(f"\n[6] Analysis:") +print(f" Model runs per day: {model_runs / len(field_data):.2f}x") +print(f" Expected: ~{len(field_data):.0f} runs (one per day)") diff --git a/python_app/harvest_detection_experiments/tests/test_model_inference.py b/python_app/harvest_detection_experiments/tests/test_model_inference.py new file mode 100644 index 0000000..e98d99c --- /dev/null +++ b/python_app/harvest_detection_experiments/tests/test_model_inference.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +""" +Complete test: Feature extraction + Model inference + Phase 1 detection +""" + +import sys +import pandas as pd +import numpy as np +import torch +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) + +from harvest_date_pred_utils import ( + load_model_and_config, + extract_features, + run_phase1_growing_window +) + +project_name = "angata" +base_storage = Path("../laravel_app/storage/app") / project_name / "Data" +CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv" + +print("="*80) +print("DEBUG: Model Inference + Phase 1 Detection") +print("="*80) + +# Load model +print("\n[1] Loading model...") +model, config, scalers = load_model_and_config(Path(".")) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f" Device: {device}") +print(f" Scalers type: {type(scalers)}") +print(f" Number of scalers: {len(scalers) if isinstance(scalers, list) else 'N/A (dict/object)'}") + +# Load CI data +print("\n[2] Loading CI data...") +ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str}) +ci_data['Date'] = pd.to_datetime(ci_data['Date']) + +# Test on a known field (field 1) +test_field = "1" +field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True) +print(f"\n[3] Testing on field {test_field}...") +print(f" Data points: {len(field_data)}") + +# Test with first 100 days +subset_100 = field_data.iloc[:100].copy().reset_index(drop=True) +print(f"\n[4] Testing model inference on first 100 days...") + +try: + features = extract_features(subset_100, config['features'], ci_column='value') + print(f" Features shape: {features.shape}") + print(f" Features dtype: {features.dtype}") + + # Apply scalers + features_scaled = features.copy().astype(float) + print(f" Applying {len(scalers)} scalers...") + + for fi, scaler in enumerate(scalers): + try: + col_data = features[:, fi].reshape(-1, 1) + scaled_col = scaler.transform(col_data) + features_scaled[:, fi] = scaled_col.flatten() + if fi < 3: # Show first 3 scalers + print(f" Scaler {fi}: transformed {features[0, fi]:.4f} → {features_scaled[0, fi]:.4f}") + except Exception as e: + print(f" ERROR in scaler {fi}: {e}") + raise + + # Run model + print(f"\n Running model inference...") + x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device) + print(f" Tensor shape: {x_tensor.shape}") + + with torch.no_grad(): + imminent_probs, detected_probs = model(x_tensor) + + print(f" Imminent probs shape: {imminent_probs.shape}") + print(f" Detected probs shape: {detected_probs.shape}") + print(f" Detected probs dtype: {detected_probs.dtype}") + + # Analyze detected probs + detected_np = detected_probs[0].cpu().numpy() # Get first (only) batch + print(f"\n Detected head analysis:") + print(f" Min: {detected_np.min():.4f}") + print(f" Max: {detected_np.max():.4f}") + print(f" Mean: {detected_np.mean():.4f}") + print(f" Median: {np.median(detected_np):.4f}") + print(f" > 0.1: {(detected_np > 0.1).sum()} days") + print(f" > 0.3: {(detected_np > 0.3).sum()} days") + print(f" > 0.5: {(detected_np > 0.5).sum()} days") + + # Show top 5 peaks + top_indices = np.argsort(detected_np)[-5:][::-1] + print(f"\n Top 5 detected peaks:") + for idx in top_indices: + date = subset_100.iloc[idx]['Date'].date() + prob = detected_np[idx] + print(f" Day {idx} ({date}): {prob:.4f}") + +except Exception as e: + print(f" ERROR: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + +# Test Phase 1 growing window +print(f"\n[5] Testing Phase 1 growing window (threshold=0.3, consecutive=2)...") +try: + phase1_results = run_phase1_growing_window( + subset_100, model, config, scalers, 'value', device, + threshold=0.3, consecutive_days=2 + ) + print(f" ✓ Phase 1 found {len(phase1_results)} harvest(s):") + for harvest_date, harvest_idx in phase1_results: + print(f" {harvest_date.date()}: index {harvest_idx}") +except Exception as e: + print(f" ERROR: {e}") + import traceback + traceback.print_exc() + +print("\n✓ Model inference test complete") diff --git a/python_app/harvest_detection_experiments/tests/test_script22_debug.py b/python_app/harvest_detection_experiments/tests/test_script22_debug.py new file mode 100644 index 0000000..85186b1 --- /dev/null +++ b/python_app/harvest_detection_experiments/tests/test_script22_debug.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +""" +Debug script: Test if script 22 logic is working +Tests the two-step refinement on a single known field +""" + +import sys +import time +import pandas as pd +import numpy as np +import torch +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from harvest_date_pred_utils import ( + load_model_and_config, + extract_features, + run_phase1_growing_window, +) + +project_name = "angata" + +# Find the workspace root by looking for laravel_app folder +script_dir = Path(__file__).parent +root = script_dir +while root != root.parent: + if (root / "laravel_app").exists(): + break + root = root.parent + +base_storage = root / "laravel_app" / "storage" / "app" / project_name / "Data" +CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv" +MODEL_DIR = root / "python_app" + +print("="*80) +print("DEBUG: Script 22 Two-Step Refinement Logic") +print("="*80) + +# Load model +print("\n[1] Loading model...") +model, config, scalers = load_model_and_config(MODEL_DIR) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f" Device: {device}") +print(f" Model features: {config['features']}") + +# Load CI data +print("\n[2] Loading CI data...") +ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str}) +ci_data['Date'] = pd.to_datetime(ci_data['Date']) +print(f" Total rows: {len(ci_data)}") +print(f" Fields: {ci_data['field'].nunique()}") +print(f" Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}") + +# Test on a known field (field 779 from our previous tests) +test_field = "779" +field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True) + +print(f"\n[3] Testing on field {test_field}...") +print(f" Data points: {len(field_data)}") +print(f" Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}") + +if len(field_data) == 0: + print(f" ERROR: No data for field {test_field}") + sys.exit(1) + +# Extract features +print(f"\n[4] Extracting features for field {test_field}...") +try: + features = extract_features(field_data.reset_index(drop=True), config['features'], ci_column='value') + print(f" Features shape: {features.shape}") + print(f" Features dtype: {features.dtype}") +except Exception as e: + print(f" ERROR: Could not extract features: {e}") + sys.exit(1) + +# Normalize and run model +print(f"\n[5] Running Phase 1 GROWING WINDOW method (threshold=0.5, consecutive=3)...") +print(f" This simulates real production: expanding windows, checking each day") +print(f" Expected: ~477 model runs for 477 days (SLOW)") + +import time +start_time = time.time() + +# Add instrumentation to see how many model runs are happening +original_run = run_phase1_growing_window + +def instrumented_run(*args, **kwargs): + import sys + from harvest_date_pred_utils import extract_features + + field_data = args[0] + model = args[1] + config = args[2] + scalers = args[3] + ci_column = args[4] + device = args[5] + threshold = kwargs.get('threshold', 0.3) + consecutive_days = kwargs.get('consecutive_days', 2) + + harvest_dates = [] + current_pos = 0 + model_runs = 0 + + print(f" Starting growing window loop...") + + while current_pos < len(field_data): + consecutive_above_threshold = 0 + loop_start = current_pos + + for window_end in range(current_pos + 1, len(field_data) + 1): + window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True) + + try: + features = extract_features(window_data, config['features'], ci_column=ci_column) + + features_scaled = features.copy().astype(float) + for fi, scaler in enumerate(scalers): + try: + features_scaled[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten() + except Exception as e: + raise ValueError(f"Scaler {fi} failed: {e}") + + import torch + with torch.no_grad(): + x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device) + imminent_probs, detected_probs = model(x_tensor) + + model_runs += 1 + last_prob = detected_probs[0, -1].item() + + if last_prob > threshold: + consecutive_above_threshold += 1 + else: + consecutive_above_threshold = 0 + + if consecutive_above_threshold >= consecutive_days: + harvest_date = field_data.iloc[current_pos + window_end - consecutive_days]['Date'] + harvest_dates.append((harvest_date, current_pos + window_end - consecutive_days)) + current_pos = current_pos + window_end - consecutive_days + 1 + break + + except Exception as e: + pass + else: + break + + print(f" Model runs performed: {model_runs}") + return harvest_dates + +phase1_results = instrumented_run( + field_data.reset_index(drop=True), + model, config, scalers, 'value', device, + threshold=0.5, + consecutive_days=3 +) + +elapsed = time.time() - start_time +print(f"\n Time elapsed: {elapsed:.2f}s") + +if phase1_results: + print(f" ✓ Phase 1 detected {len(phase1_results)} harvest(s):") + + # Get probabilities for display by running model once on full field + with torch.no_grad(): + X = features.reshape(1, -1, len(config['features'])) + X_normalized = np.zeros_like(X) + for fi, scaler in enumerate(scalers): + X_normalized[0, :, fi] = scaler.transform(X[0, :, fi].reshape(-1, 1)).flatten() + X_tensor = torch.from_numpy(X_normalized).float().to(device) + _, detected_probs = model(X_tensor) + detected_np = detected_probs[0].cpu().numpy() + + for harvest_date, harvest_idx in phase1_results: + prob = detected_np[harvest_idx] if harvest_idx < len(detected_np) else 0.0 + print(f" {harvest_date.date()}: index {harvest_idx}, probability={prob:.4f}") +else: + print(f" ✗ Phase 1: No harvest detected") diff --git a/r_app/40_mosaic_creation.R b/r_app/40_mosaic_creation.R index 7b7ab23..f1d6b91 100644 --- a/r_app/40_mosaic_creation.R +++ b/r_app/40_mosaic_creation.R @@ -14,8 +14,8 @@ # - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE) # # Examples: -# Rscript 04_mosaic_creation.R 2025-12-21 7 angata -# Rscript 04_mosaic_creation.R 2025-12-21 7 angata week_51.tif TRUE 5 [tile-based] + +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 angata # # 1. Load required packages @@ -142,15 +142,35 @@ main <- function() { if (use_tile_mosaic) { # TILE-BASED APPROACH: Create per-tile weekly MAX mosaics # This is used for projects like Angata with large ROIs requiring spatial partitioning + # Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20) tryCatch({ safe_log("Starting per-tile mosaic creation (tile-based approach)...") - # Set output directory for per-tile mosaics - tile_output_base <- file.path(laravel_storage, "weekly_tile_max") + # Detect grid size from merged_final_tif folder structure + # Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc. + merged_final_base <- file.path(laravel_storage, "merged_final_tif") + grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE) + # Look for grid size patterns like "5x5", "10x10", "20x20" + grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) + + if (length(grid_patterns) == 0) { + stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)") + } + + grid_size <- grid_patterns[1] # Use first grid size found + safe_log(paste("Detected grid size:", grid_size)) + + # Point to the grid-specific merged_final_tif directory + merged_final_with_grid <- file.path(merged_final_base, grid_size) + + # Set output directory for per-tile mosaics, organized by grid size + # Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif + tile_output_base <- file.path(laravel_storage, "weekly_tile_max", grid_size) + dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE) created_tile_files <- create_weekly_mosaic_from_tiles( dates = dates, - merged_final_dir = merged_final, + merged_final_dir = merged_final_with_grid, tile_output_dir = tile_output_base, field_boundaries = field_boundaries ) diff --git a/r_app/40_mosaic_creation_utils.R b/r_app/40_mosaic_creation_utils.R index 60e2f26..dc7b778 100644 --- a/r_app/40_mosaic_creation_utils.R +++ b/r_app/40_mosaic_creation_utils.R @@ -354,18 +354,17 @@ create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NU } # Get filenames of best-coverage images - # Match by extracting tile ID from both cloud stats and TIF filenames + # Match by full filename from cloud stats to TIF files rasters_to_use <- character() for (idx in best_coverage) { - # Extract tile ID from cloud_coverage_stats filename (e.g., "tile_18" → 18) + # Get the full filename from cloud coverage stats cc_filename <- cloud_coverage_stats$filename[idx] - cc_tile_id <- gsub(".*_([0-9]+).*", "\\1", cc_filename) - # Find matching TIF file by matching tile ID + # Find matching TIF file by full filename matching_tif <- NULL for (tif_file in tif_files) { - tif_tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", basename(tif_file)) - if (tif_tile_id == cc_tile_id) { + tif_basename <- basename(tif_file) + if (tif_basename == cc_filename) { matching_tif <- tif_file break } @@ -373,6 +372,8 @@ create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NU if (!is.null(matching_tif)) { rasters_to_use <- c(rasters_to_use, matching_tif) + } else { + safe_log(paste("Warning: Could not find TIF file matching cloud stats entry:", cc_filename), "WARNING") } } @@ -420,42 +421,60 @@ create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NU mosaic <- tryCatch({ safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds")) - # Get extent from field boundaries if available, otherwise use raster intersection - if (!is.null(field_boundaries)) { - crop_extent <- terra::ext(field_boundaries) - safe_log("Using field boundaries extent for consistent area across all dates") + # Check if all rasters have identical grids (extent and resolution) + # This is likely for per-tile mosaics from the same tiling scheme + reference_raster <- all_rasters[[1]] + ref_ext <- terra::ext(reference_raster) + ref_res <- terra::res(reference_raster) + + grids_match <- all(sapply(all_rasters[-1], function(r) { + isTRUE(all.equal(terra::ext(r), ref_ext, tolerance = 1e-6)) && + isTRUE(all.equal(terra::res(r), ref_res, tolerance = 1e-6)) + })) + + if (grids_match) { + # All rasters have matching grids - no cropping/resampling needed! + safe_log("All rasters have identical grids - stacking directly for max composite") + raster_collection <- terra::sprc(all_rasters) + max_mosaic <- terra::mosaic(raster_collection, fun = "max") } else { - # Fallback: use intersection of all raster extents - crop_extent <- terra::ext(all_rasters[[1]]) - for (i in 2:length(all_rasters)) { - crop_extent <- terra::intersect(crop_extent, terra::ext(all_rasters[[i]])) + # Grids don't match - need to crop and resample + safe_log("Rasters have different grids - cropping and resampling to common extent") + + # Get extent from field boundaries if available, otherwise use raster union + if (!is.null(field_boundaries)) { + crop_extent <- terra::ext(field_boundaries) + safe_log("Using field boundaries extent for consistent area across all dates") + } else { + # Use union of all extents (covers all data) + crop_extent <- terra::ext(all_rasters[[1]]) + for (i in 2:length(all_rasters)) { + crop_extent <- terra::union(crop_extent, terra::ext(all_rasters[[i]])) + } + safe_log("Using raster union extent") } - safe_log("Using raster intersection extent") + + # Crop all rasters to common extent + cropped_rasters <- lapply(all_rasters, function(r) { + terra::crop(r, crop_extent) + }) + + # Resample all cropped rasters to match the first one's grid + reference_grid <- cropped_rasters[[1]] + + aligned_rasters <- lapply(cropped_rasters, function(r) { + if (isTRUE(all.equal(terra::ext(r), terra::ext(reference_grid), tolerance = 1e-6)) && + isTRUE(all.equal(terra::res(r), terra::res(reference_grid), tolerance = 1e-6))) { + return(r) # Already aligned + } + terra::resample(r, reference_grid, method = "near") + }) + + # Create max composite using mosaic on aligned rasters + raster_collection <- terra::sprc(aligned_rasters) + max_mosaic <- terra::mosaic(raster_collection, fun = "max") } - # Crop all rasters to common extent - cropped_rasters <- lapply(all_rasters, function(r) { - terra::crop(r, crop_extent) - }) - - # Resample all cropped rasters to match the first one's grid - # This handles pixel grid misalignment from Python's dynamic extent adjustment - reference_grid <- cropped_rasters[[1]] - safe_log("Resampling rasters to common grid for stacking") - - aligned_rasters <- lapply(cropped_rasters, function(r) { - if (identical(terra::ext(r), terra::ext(reference_grid)) && - identical(terra::res(r), terra::res(reference_grid))) { - return(r) # Already aligned - } - terra::resample(r, reference_grid, method = "near") - }) - - # Create max composite using mosaic on aligned rasters - # Resample ensures all rasters have matching grids (no resolution mismatch) - raster_collection <- terra::sprc(aligned_rasters) - max_mosaic <- terra::mosaic(raster_collection, fun = "max") - max_mosaic }, error = function(e) { safe_log(paste("Max composite creation failed:", e$message), "WARNING") @@ -686,6 +705,16 @@ create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output next } + # DEBUG: Check mosaic content before saving + safe_log(paste(" DEBUG: Mosaic tile", tile_id, "dimensions:", nrow(tile_mosaic), "x", ncol(tile_mosaic))) + safe_log(paste(" DEBUG: Mosaic tile", tile_id, "bands:", terra::nlyr(tile_mosaic))) + + # Check first band values + band1 <- tile_mosaic[[1]] + band1_min <- terra::global(band1, fun = "min", na.rm = TRUE)$min + band1_max <- terra::global(band1, fun = "max", na.rm = TRUE)$max + safe_log(paste(" DEBUG: Band 1 MIN=", round(band1_min, 2), "MAX=", round(band1_max, 2))) + # Step 2c: Save this tile's weekly MAX mosaic # Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1) tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_", @@ -763,7 +792,7 @@ count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) { missing_pct <- round(100 - ((total_notna / total_pixels) * 100)) aggregated_results[[idx]] <- data.frame( - filename = paste0("tile_", sprintf("%02d", as.integer(gsub(".*_([0-9]+)\\.tif", "\\1", basename(tile_file))))), + filename = basename(tile_file), # Keep full filename: 2026-01-07_03.tif notNA = total_notna, total_pixels = total_pixels, missing_pixels_percentage = missing_pct, diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 6977491..59a9195 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -20,6 +20,8 @@ # Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7 # Arguments: [end_date] [project_dir] [offset_days] # +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7 +# # Usage in run_full_pipeline.R: # source("r_app/80_calculate_kpis.R") # main() @@ -122,7 +124,7 @@ STATUS_TRIGGERS <- data.frame( # TILE-AWARE HELPER FUNCTIONS # ============================================================================ -get_tile_ids_for_field <- function(field_geom, tile_grid) { +get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { if (inherits(field_geom, "sf")) { field_bbox <- sf::st_bbox(field_geom) field_xmin <- field_bbox["xmin"] @@ -139,6 +141,17 @@ get_tile_ids_for_field <- function(field_geom, tile_grid) { stop("field_geom must be sf or terra::vect object") } + # DEBUG: Print bbox info for first field + if (!is.null(field_id) && field_id == "1391") { + message(paste("[DEBUG get_tile_ids] Field bbox - xmin:", field_xmin, "xmax:", field_xmax, + "ymin:", field_ymin, "ymax:", field_ymax)) + message(paste("[DEBUG get_tile_ids] tile_grid sample: id=", tile_grid$id[1], + "xmin=", tile_grid$xmin[1], "xmax=", tile_grid$xmax[1], + "ymin=", tile_grid$ymin[1], "ymax=", tile_grid$ymax[1])) + message(paste("[DEBUG get_tile_ids] tile_grid CRS:", sf::st_crs(tile_grid))) + message(paste("[DEBUG get_tile_ids] field CRS:", sf::st_crs(field_geom))) + } + intersecting_tiles <- tile_grid$id[ !(tile_grid$xmax < field_xmin | tile_grid$xmin > field_xmax | @@ -189,6 +202,21 @@ load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_di } build_tile_grid <- function(mosaic_dir, week_num, year) { + # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/) + # First check if mosaic_dir contains grid-size subdirectories + detected_grid_size <- NA + if (dir.exists(mosaic_dir)) { + subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + + if (length(grid_patterns) > 0) { + # Use the first grid-size subdirectory found + detected_grid_size <- grid_patterns[1] + mosaic_dir <- file.path(mosaic_dir, detected_grid_size) + message(paste(" Using grid-size subdirectory:", detected_grid_size)) + } + } + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) @@ -230,7 +258,12 @@ build_tile_grid <- function(mosaic_dir, week_num, year) { stop("Could not extract extents from any tile files") } - return(tile_grid) + # RETURN BOTH the grid AND the corrected mosaic directory path + return(list( + tile_grid = tile_grid, + mosaic_dir = mosaic_dir, + grid_size = detected_grid_size + )) } # ============================================================================ @@ -399,14 +432,20 @@ load_historical_field_data <- function(project_dir, current_week, reports_dir, n USE_UNIFORM_AGE <- TRUE UNIFORM_PLANTING_DATE <- as.Date("2025-01-01") -extract_planting_dates <- function(harvesting_data) { +extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { if (USE_UNIFORM_AGE) { message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) - return(data.frame( - field_id = character(), - planting_date = as.Date(character()), - stringsAsFactors = FALSE - )) + # Return a data frame with all field IDs mapped to uniform planting date + if (!is.null(field_boundaries_sf)) { + return(data.frame( + field_id = field_boundaries_sf$field, + date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)), + stringsAsFactors = FALSE + )) + } else { + # Fallback if field_boundaries_sf not provided + return(NULL) + } } if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { @@ -449,6 +488,11 @@ analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week } field_name <- field_id + # DEBUG: Print for first few fields + if (field_idx <= 3) { + message(paste("[DEBUG] Field", field_idx, ":", field_id)) + } + field_sf <- field_boundaries_sf[field_idx, ] if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) { return(data.frame( @@ -460,7 +504,14 @@ analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000 field_area_acres <- field_area_ha / 0.404686 - tile_ids <- get_tile_ids_for_field(field_sf, tile_grid) + tile_ids <- get_tile_ids_for_field(field_sf, tile_grid, field_id = field_id) + + # DEBUG: Print tile IDs for first field + if (field_idx == 1) { + message(paste("[DEBUG] First field tile_ids:", paste(tile_ids, collapse=","))) + message(paste("[DEBUG] tile_grid nrows:", nrow(tile_grid), "ncols:", ncol(tile_grid))) + message(paste("[DEBUG] mosaic_dir:", mosaic_dir)) + } current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir) @@ -471,10 +522,26 @@ analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week )) } - field_vect <- terra::vect(sf::as_Spatial(field_sf)) - terra::crs(field_vect) <- terra::crs(current_ci) + # Extract CI values: EXACTLY LIKE SCRIPT 20 + # Crop to field bounding box first, then extract with sf directly (not terra::vect conversion) + field_bbox <- sf::st_bbox(field_sf) + ci_cropped <- terra::crop(current_ci, terra::ext(field_bbox), snap = "out") + extracted_vals <- terra::extract(ci_cropped, field_sf, fun = "mean", na.rm = TRUE) - all_extracted <- terra::extract(current_ci, field_vect)[, 2] + # extracted_vals is a data.frame with ID column (field index) + mean value + mean_ci_current <- as.numeric(extracted_vals[1, 2]) + + if (is.na(mean_ci_current)) { + return(data.frame( + Field_id = field_id, + error = "No CI values extracted from tiles" + )) + } + + # For per-tile extraction, we only have mean from the aggregation function + # To get variance/CV, we need to extract all pixels without the fun parameter + # But for farm-level purposes, the mean CI is sufficient + all_extracted <- terra::extract(ci_cropped, field_sf)[, 2] current_ci_vals <- all_extracted[!is.na(all_extracted)] num_total <- length(all_extracted) @@ -509,7 +576,9 @@ analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week tryCatch({ previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir) if (!is.null(previous_ci)) { - prev_extracted <- terra::extract(previous_ci, field_vect)[, 2] + prev_bbox <- sf::st_bbox(field_sf) + prev_ci_cropped <- terra::crop(previous_ci, terra::ext(prev_bbox), snap = "out") + prev_extracted <- terra::extract(prev_ci_cropped, field_sf)[, 2] previous_ci_vals <- prev_extracted[!is.na(prev_extracted)] if (length(previous_ci_vals) > 0) { mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE) @@ -763,6 +832,13 @@ generate_field_analysis_summary <- function(field_df) { export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { message("Exporting per-field analysis to Excel, CSV, and RDS...") + # Round all numeric columns to 2 decimals + field_df_rounded <- field_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + + summary_df_rounded <- summary_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + output_subdir <- file.path(reports_dir, "kpis", "field_analysis") if (!dir.exists(output_subdir)) { dir.create(output_subdir, recursive = TRUE) @@ -773,16 +849,16 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) sheets <- list( - "Field Data" = field_df, - "Summary" = summary_df + "Field Data" = field_df_rounded, + "Summary" = summary_df_rounded ) write_xlsx(sheets, excel_path) message(paste("✓ Field analysis Excel exported to:", excel_path)) kpi_data <- list( - field_analysis = field_df, - field_analysis_summary = summary_df, + field_analysis = field_df_rounded, + field_analysis_summary = summary_df_rounded, metadata = list( current_week = current_week, project = project_dir, @@ -798,84 +874,254 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv") csv_path <- file.path(output_subdir, csv_filename) - write_csv(field_df, csv_path) + write_csv(field_df_rounded, csv_path) message(paste("✓ Field analysis CSV exported to:", csv_path)) return(list(excel = excel_path, rds = rds_path, csv = csv_path)) } # ============================================================================ -# FARM-LEVEL KPI CALCULATION (FROM OLD 09_CALCULATE_KPIS.R) +# TILE-BASED KPI EXTRACTION FUNCTION # ============================================================================ +calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) { + # Loop through tiles, extract KPI statistics per field per tile + # Follows the same pattern as extract_ci_from_tiles in CI extraction + + message("Calculating field-level KPI statistics from tiles...") + + # Get all tile files for this week + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) + tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + message("No tiles found for week", week_num, year) + return(NULL) + } + + # Process tiles in parallel using furrr (same as CI extraction) + message(paste("Processing", length(tile_files), "tiles in parallel...")) + + field_kpi_list <- furrr::future_map( + tile_files, + ~ process_single_kpi_tile( + tile_file = ., + field_boundaries_sf = field_boundaries_sf, + tile_grid = tile_grid + ), + .progress = TRUE, + .options = furrr::furrr_options(seed = TRUE) + ) + + # Combine results from all tiles + field_kpi_stats <- dplyr::bind_rows(field_kpi_list) + + if (nrow(field_kpi_stats) == 0) { + message(" No KPI data extracted from tiles") + return(NULL) + } + + message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields")) + return(field_kpi_stats) +} + +# Helper function to process a single tile (like process_single_tile in CI extraction) +process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) { + tryCatch({ + tile_basename <- basename(tile_file) + # Load tile raster + tile_raster <- terra::rast(tile_file) + + # Get first band (CI band for weekly mosaics) + ci_band <- tile_raster[[1]] + + # EXACTLY LIKE SCRIPT 20: Crop to field bounding box first, then extract with sf directly + field_bbox <- sf::st_bbox(field_boundaries_sf) + ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out") + + # Extract CI values for ALL fields at once using sf object directly (NOT terra::vect) + # terra::extract() works with sf objects and handles geometries properly + extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE) + + # Initialize results for this tile + tile_results <- data.frame() + + # Get tile ID from filename + tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename)) + + # Process each field: extracted_vals is a data.frame with ID column (field indices) + extracted values + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + field_id <- field_boundaries_sf$field[field_idx] + + # extracted_vals columns: 1=ID, 2=mean_CI (since we used fun="mean") + mean_ci <- extracted_vals[field_idx, 2] + + # Skip if no data for this field in this tile + if (is.na(mean_ci)) { + next + } + + # For tile-level stats, we only have mean from extraction (no variance without all pixels) + # Add to results + tile_results <- rbind(tile_results, data.frame( + field = field_id, + tile_id = tile_id_match, + tile_file = tile_basename, + mean_ci = round(mean_ci, 4), + stringsAsFactors = FALSE + )) + } + + return(tile_results) + + }, error = function(e) { + message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message)) + return(data.frame()) + }) +} + calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir, - weekly_CI_mosaic, reports_dir) { + weekly_CI_mosaic, reports_dir, current_week, year, + tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") { message("\n=== CALCULATING FARM-LEVEL KPIs ===") - message("(6 high-level KPI metrics)") - - tryCatch({ - source(here("r_app", "kpi_utils.R")) - }, error = function(e) { - message(paste("Warning: Could not load kpi_utils.R:", e$message)) - message("Farm-level KPIs will be skipped") - return(NULL) - }) - - if (!exists("calculate_all_kpis")) { - message("Warning: calculate_all_kpis() function not found in kpi_utils.R") - return(NULL) - } + message("(6 high-level KPI metrics with tile-based extraction)") output_dir <- file.path(reports_dir, "kpis") if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) } - tryCatch({ - kpi_results <- calculate_all_kpis( - report_date = report_date, - output_dir = output_dir, - field_boundaries_sf = field_boundaries_sf, - harvesting_data = harvesting_data, - cumulative_CI_vals_dir = cumulative_CI_vals_dir, - weekly_CI_mosaic = weekly_CI_mosaic, - reports_dir = reports_dir, - project_dir = project_dir - ) - - # Print KPI summary - cat("\n=== FARM-LEVEL KPI SUMMARY ===\n") - cat("Report Date:", as.character(kpi_results$metadata$report_date), "\n") - cat("Current Week:", kpi_results$metadata$current_week, "\n") - cat("Previous Week:", kpi_results$metadata$previous_week, "\n") - cat("Total Fields Analyzed:", kpi_results$metadata$total_fields, "\n") - cat("Calculation Time:", as.character(kpi_results$metadata$calculation_time), "\n") - - cat("\n--- KPI Metrics ---\n") - cat("Field Uniformity Summary:\n") - print(kpi_results$field_uniformity_summary) - - cat("\nArea Change Summary:\n") - print(kpi_results$area_change) - - cat("\nTCH Forecasted:\n") - print(kpi_results$tch_forecasted) - - cat("\nGrowth Decline Index:\n") - print(kpi_results$growth_decline) - - cat("\nWeed Presence Score:\n") - print(kpi_results$weed_presence) - - cat("\nGap Filling Score:\n") - print(kpi_results$gap_filling) - - return(kpi_results) - }, error = function(e) { - message(paste("Error calculating farm-level KPIs:", e$message)) + # Get mosaic directory with grid size if using tiles + mosaic_dir <- if (use_tile_mosaic && !is.null(tile_grid_size)) { + file.path(weekly_CI_mosaic, tile_grid_size) + } else { + weekly_CI_mosaic + } + + # Extract field-level KPI statistics from tiles + field_kpi_stats <- calculate_field_kpis_from_tiles( + tile_dir = mosaic_dir, + week_num = current_week, + year = year, + field_boundaries_sf = field_boundaries_sf, + tile_grid = tile_grid + ) + + if (is.null(field_kpi_stats) || nrow(field_kpi_stats) == 0) { + message("Warning: No field KPI statistics extracted from tiles") return(NULL) - }) + } + + # Aggregate tile-based statistics by field (average across tiles for each field) + field_summary_stats <- field_kpi_stats %>% + dplyr::group_by(field) %>% + dplyr::summarise( + mean_ci = mean(mean_ci, na.rm = TRUE), + cv_ci = mean(cv_ci, na.rm = TRUE), + min_ci = min(min_ci, na.rm = TRUE), + max_ci = max(max_ci, na.rm = TRUE), + total_pixels = sum(n_pixels, na.rm = TRUE), + num_tiles = n_distinct(tile_id), + .groups = 'drop' + ) + + # Create results list + kpi_results <- list( + field_kpi_stats = field_kpi_stats, + field_summary_stats = field_summary_stats, + metadata = list( + report_date = report_date, + current_week = current_week, + year = year, + calculation_method = "tile_based_extraction", + num_fields_processed = length(unique(field_kpi_stats$field)), + num_tiles_processed = length(unique(field_kpi_stats$tile_id)) + ) + ) + + # Save results + rds_filename <- paste0(project_dir, "_farm_kpi_stats_week", sprintf("%02d", current_week), ".rds") + rds_path <- file.path(output_dir, rds_filename) + saveRDS(kpi_results, rds_path) + message(paste("✓ Farm-level KPI stats exported to:", rds_path)) + + # Print summary + cat("\n=== FARM-LEVEL KPI SUMMARY ===\n") + cat("Report Date:", as.character(report_date), "\n") + cat("Week:", current_week, "Year:", year, "\n") + cat("Fields Processed:", length(unique(field_kpi_stats$field)), "\n") + cat("Tiles Processed:", length(unique(field_kpi_stats$tile_id)), "\n") + cat("\n--- Field Summary Statistics (Mean across tiles) ---\n") + print(head(field_summary_stats, 20)) + + return(kpi_results) +} + +# ============================================================================ +# HELPER: Extract field-level statistics from CI raster (all pixels, single call) +# ============================================================================ + +extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { + #' Extract CI statistics for all fields from a single CI raster band + #' + #' This function extracts all pixel values for each field in one terra::extract call, + #' then calculates mean, CV, and percentiles from those pixels. + #' + #' @param ci_band Single CI band from terra raster + #' @param field_boundaries_sf SF object with field geometries + #' @return Data frame with columns: field_idx, mean_ci, cv, p10, p90, pixel_count + + # Extract all pixels for all fields at once (more efficient than individual calls) + all_pixels <- terra::extract(ci_band, field_boundaries_sf) + + # Calculate statistics for each field + stats_list <- list() + + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + # Extract pixel values for this field (skip ID column 1) + pixels <- all_pixels[field_idx, -1, drop = TRUE] + pixels <- as.numeric(pixels) + pixels <- pixels[!is.na(pixels)] + + # Only calculate stats if we have pixels + if (length(pixels) > 0) { + mean_val <- mean(pixels, na.rm = TRUE) + + # Only calculate CV if mean > 0 (avoid division by zero) + if (mean_val > 0) { + cv_val <- sd(pixels, na.rm = TRUE) / mean_val + } else { + cv_val <- NA + } + + p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]] + p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]] + + stats_list[[field_idx]] <- data.frame( + field_idx = field_idx, + mean_ci = mean_val, + cv = cv_val, + p10 = p10_val, + p90 = p90_val, + pixel_count = length(pixels), + stringsAsFactors = FALSE + ) + } else { + # No pixels for this field (doesn't intersect tile) + stats_list[[field_idx]] <- data.frame( + field_idx = field_idx, + mean_ci = NA_real_, + cv = NA_real_, + p10 = NA_real_, + p90 = NA_real_, + pixel_count = 0, + stringsAsFactors = FALSE + ) + } + } + + return(dplyr::bind_rows(stats_list)) } # ============================================================================ @@ -923,7 +1169,7 @@ main <- function() { message("") # Load configuration and utilities - source(here("r_app", "crop_messaging_utils.R")) + # source(here("r_app", "crop_messaging_utils.R")) tryCatch({ source(here("r_app", "parameters_project.R")) @@ -950,10 +1196,29 @@ main <- function() { message(paste("Week:", current_week, "/ Year:", year)) - message("Building tile grid from available weekly tiles...") - tile_grid <- build_tile_grid(weekly_tile_max, current_week, year) - message(paste(" Found", nrow(tile_grid), "tiles")) + # Find tile files - approach from Script 20 + message("Finding tile files...") + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, year) + # Detect grid size subdirectory + detected_grid_size <- NA + if (dir.exists(weekly_tile_max)) { + subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + if (length(grid_patterns) > 0) { + detected_grid_size <- grid_patterns[1] + mosaic_dir <- file.path(weekly_tile_max, detected_grid_size) + message(paste(" Using grid-size subdirectory:", detected_grid_size)) + } + } + + tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) + if (length(tile_files) == 0) { + stop(paste("No tile files found for week", current_week, year, "in", mosaic_dir)) + } + message(paste(" Found", length(tile_files), "tiles")) + + # Load field boundaries tryCatch({ boundaries_result <- load_field_boundaries(data_dir) @@ -979,38 +1244,217 @@ main <- function() { } historical_data <- load_historical_field_data(project_dir, current_week, reports_dir, num_weeks = num_weeks_to_load) - planting_dates <- extract_planting_dates(harvesting_data) + planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) - message("Setting up parallel processing...") - current_plan <- class(future::plan())[1] - if (current_plan == "sequential") { - num_workers <- parallel::detectCores() - 1 - message(paste(" Using", num_workers, "workers")) - future::plan(future::multisession, workers = num_workers) + # Validate planting_dates + if (is.null(planting_dates) || nrow(planting_dates) == 0) { + message("WARNING: No planting dates available. Using NA for all fields.") + planting_dates <- data.frame( + field_id = field_boundaries_sf$field, + date = rep(as.Date(NA), nrow(field_boundaries_sf)), + stringsAsFactors = FALSE + ) } - message("Analyzing", nrow(field_boundaries_sf), "fields in parallel...") + # SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile + message("\nProcessing tiles and extracting field statistics...") + all_tile_results <- list() - field_analysis_list <- furrr::future_map( - seq_len(nrow(field_boundaries_sf)), - ~ analyze_single_field( - field_idx = ., - field_boundaries_sf = field_boundaries_sf, - tile_grid = tile_grid, - week_num = current_week, - year = year, - mosaic_dir = weekly_tile_max, - historical_data = historical_data, - planting_dates = planting_dates, - report_date = end_date, - harvest_imminence_data = NULL, - harvesting_data = harvesting_data - ), - .progress = TRUE, - .options = furrr::furrr_options(seed = TRUE) - ) + for (i in seq_along(tile_files)) { + tile_file <- tile_files[i] + message(paste(" Processing tile", i, "of", length(tile_files), ":", basename(tile_file))) + + tryCatch({ + # Load current tile and previous week tile + current_rast <- terra::rast(tile_file) + + # DEBUG: Check tile structure on first tile + if (i == 1) { + message(paste(" [DEBUG] Tile CRS:", terra::crs(current_rast))) + message(paste(" [DEBUG] Tile extent:", paste(terra::ext(current_rast)))) + message(paste(" [DEBUG] Field boundaries CRS:", sf::st_crs(field_boundaries_sf))) + field_bbox <- sf::st_bbox(field_boundaries_sf) + message(paste(" [DEBUG] Field bbox:", paste(round(field_bbox, 2)))) + message(paste(" [DEBUG] Band names:", paste(names(current_rast), collapse=", "))) + } + + # Extract CI band by name + ci_band <- current_rast[["CI"]] + + # Check if CI band exists - use proper logical checks + if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { + message(paste(" ERROR: CI band not found. Available bands:", paste(names(current_rast), collapse=", "))) + next + } + + # Check if CI band has any valid data + if (tryCatch(all(is.na(values(ci_band))), error = function(e) TRUE)) { + message(paste(" ERROR: CI band has no valid data")) + next + } + + # Load previous week tile if available + previous_tile_file <- sub(sprintf("week_%02d", current_week), + sprintf("week_%02d", previous_week), + tile_file) + previous_ci <- NULL + if (file.exists(previous_tile_file)) { + previous_rast <- terra::rast(previous_tile_file) + previous_ci <- previous_rast[["CI"]] + } + + # OPTION 1 + 2: Extract all CI statistics from one pixel extraction (single call) + current_stats <- extract_field_statistics_from_ci(ci_band, field_boundaries_sf) + + # DEBUG: Check extraction result on first tile + if (i == 1) { + num_with_data <- sum(!is.na(current_stats$mean_ci)) + message(paste(" [DEBUG] Extracted", nrow(current_stats), "fields, ", num_with_data, "with non-NA data")) + if (num_with_data > 0) { + message(paste(" [DEBUG] Sample mean CIs:", paste(head(current_stats$mean_ci[!is.na(current_stats$mean_ci)], 3), collapse=", "))) + } + } + + # Extract previous week CI statistics if available + previous_stats <- NULL + if (!is.null(previous_ci)) { + previous_stats <- extract_field_statistics_from_ci(previous_ci, field_boundaries_sf) + } + + # Process each field that was extracted + field_results_this_tile <- list() + fields_added <- 0 + + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + tryCatch({ + field_id <- field_boundaries_sf$field[field_idx] + field_sf <- field_boundaries_sf[field_idx, ] + + # Get statistics from helper function results + # current_stats should have same number of rows as field_boundaries_sf + if (field_idx > nrow(current_stats)) { + message(paste(" [ERROR] field_idx", field_idx, "> nrow(current_stats)", nrow(current_stats))) + next + } + + mean_ci_current <- current_stats$mean_ci[field_idx] + pixel_count <- current_stats$pixel_count[field_idx] + + # SKIP fields with no data in this tile (they don't intersect this tile) + if (is.na(pixel_count) || pixel_count == 0) { + next + } + ci_cv_current <- current_stats$cv[field_idx] + ci_percentile_low <- current_stats$p10[field_idx] + ci_percentile_high <- current_stats$p90[field_idx] + + # If field doesn't intersect this tile, mean_ci_current will be NA + if (is.na(mean_ci_current)) { + next # Skip this field - doesn't intersect this tile + } + + field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000 + field_area_acres <- field_area_ha / 0.404686 + + # Extract previous week CI if available + mean_ci_previous <- NA + ci_change <- NA + if (!is.null(previous_stats)) { + mean_ci_previous <- previous_stats$mean_ci[field_idx] + if (!is.na(mean_ci_previous)) { + ci_change <- mean_ci_current - mean_ci_previous + } + } + + # Reconstruct pixel values for status trigger (we need the actual pixel array) + # Use the percentiles and mean to create a synthetic distribution for status_trigger + # For now, use mean CI repeated by pixel count for testing + # TODO: Consider extracting pixels directly if needed for more complex triggers + pixel_count <- current_stats$pixel_count[field_idx] + ci_vals_current <- if (pixel_count > 0) { + rep(mean_ci_current, pixel_count) # Simplified: use mean value repeated + } else { + numeric(0) + } + + # Calculate age + age_weeks <- if (!is.null(planting_dates) && nrow(planting_dates) > 0 && field_idx <= nrow(planting_dates)) { + planting_date <- planting_dates$date[field_idx] + if (!is.na(planting_date)) { + as.numeric(difftime(end_date, planting_date, units = "weeks")) + } else { + 0 + } + } else { + 0 + } + + # Get phase and status + phase <- get_phase_by_age(age_weeks) + status_trigger <- get_status_trigger(ci_vals_current, ci_change, age_weeks) + + # Cloud coverage categorization based on CI value + # No data = No image available + # CI 0.01 to 95 = Partial coverage + # CI >= 95 = Clear view + if (is.na(mean_ci_current) || mean_ci_current == 0) { + cloud_category <- "No image available" + # Set all CI metrics to NA since no valid data + ci_change <- NA + ci_cv_current <- NA + ci_percentile_low <- NA + ci_percentile_high <- NA + } else if (mean_ci_current >= 95) { + cloud_category <- "Clear view" + } else { + cloud_category <- "Partial coverage" + } + + # Build result row + result_row <- data.frame( + Field_id = field_id, + Acreage = field_area_acres, + Mean_CI = mean_ci_current, + Mean_CI_prev = mean_ci_previous, + CI_change = ci_change, + CI_CV = ci_cv_current, + CI_percentile_low = ci_percentile_low, + CI_percentile_high = ci_percentile_high, + Age_weeks = age_weeks, + Phase = phase, + Status_trigger = status_trigger, + Cloud_category = cloud_category, + stringsAsFactors = FALSE + ) + + field_results_this_tile[[as.character(field_id)]] <- result_row + fields_added <- fields_added + 1 + + }, error = function(e) { + # Show error for debugging + message(paste(" [FIELD ERROR] Field", field_idx, ":", e$message)) + }) + } + + if (length(field_results_this_tile) > 0) { + all_tile_results[[basename(tile_file)]] <- dplyr::bind_rows(field_results_this_tile) + message(paste(" Extracted", length(field_results_this_tile), "fields from tile (processed", fields_added, "fields total)")) + } else { + message(paste(" WARNING: No fields extracted from this tile (processed", fields_added, "fields, all either NA or errored)")) + } + + }, error = function(e) { + message(paste(" Error processing tile", basename(tile_file), ":", e$message)) + }) + } - field_analysis_df <- dplyr::bind_rows(field_analysis_list) + # Combine all tile results, keeping unique fields (may appear in multiple tiles) + if (length(all_tile_results) == 0) { + stop("No fields extracted from any tiles!") + } + + field_analysis_df <- dplyr::bind_rows(all_tile_results) %>% + distinct(Field_id, .keep_all = TRUE) if (nrow(field_analysis_df) == 0) { stop("No fields analyzed successfully!") @@ -1038,17 +1482,90 @@ main <- function() { cat("\n--- Summary Statistics ---\n") print(summary_statistics_df) - # ========== FARM-LEVEL KPI CALCULATION ========== + # ========== FARM-LEVEL KPI AGGREGATION ========== + # Aggregate the per-field analysis into farm-level summary statistics - farm_kpi_results <- calculate_and_export_farm_kpis( - report_date = end_date, - project_dir = project_dir, - field_boundaries_sf = field_boundaries_sf, - harvesting_data = harvesting_data, - cumulative_CI_vals_dir = cumulative_CI_vals_dir, - weekly_CI_mosaic = weekly_CI_mosaic, - reports_dir = reports_dir - ) + cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n") + + # Filter to only fields that have actual data (non-NA CI and valid acreage) + field_data <- field_analysis_df %>% + filter(!is.na(Mean_CI) & !is.na(Acreage)) %>% + filter(Acreage > 0) + + if (nrow(field_data) > 0) { + + if (nrow(field_data) > 0) { + # Create summary statistics + farm_summary <- list() + + # 1. PHASE DISTRIBUTION + phase_dist <- field_data %>% + group_by(Phase) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Phase) + + farm_summary$phase_distribution <- phase_dist + + # 2. STATUS TRIGGER DISTRIBUTION + status_dist <- field_data %>% + group_by(Status_trigger) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Status_trigger) + + farm_summary$status_distribution <- status_dist + + # 3. CLOUD COVERAGE DISTRIBUTION + cloud_dist <- field_data %>% + group_by(Cloud_category) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Cloud_category) + + farm_summary$cloud_distribution <- cloud_dist + + # 4. OVERALL STATISTICS + farm_summary$overall_stats <- data.frame( + total_fields = nrow(field_data), + total_acreage = sum(field_data$Acreage, na.rm = TRUE), + mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), + median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), + mean_cv = round(mean(field_data$CI_CV, na.rm = TRUE), 4), + week = current_week, + year = year, + date = as.character(end_date) + ) + + # Print summaries + cat("\n--- PHASE DISTRIBUTION ---\n") + print(phase_dist) + + cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n") + print(status_dist) + + cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n") + print(cloud_dist) + + cat("\n--- OVERALL FARM STATISTICS ---\n") + print(farm_summary$overall_stats) + + farm_kpi_results <- farm_summary + } else { + farm_kpi_results <- NULL + } + } else { + farm_kpi_results <- NULL + } # ========== FINAL SUMMARY ========== @@ -1063,7 +1580,7 @@ main <- function() { if (!is.null(farm_kpi_results)) { cat("\nFarm-level KPIs: CALCULATED\n") } else { - cat("\nFarm-level KPIs: SKIPPED (kpi_utils.R not available)\n") + cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n") } cat("\n✓ Consolidated KPI calculation complete!\n") diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index a00a5f7..5890a94 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -50,7 +50,7 @@ detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NUL } # PRIORITY 2: File-based detection (fallback if metadata not found) - # Check if merged_final_tif/ contains tile-named files + # Check if merged_final_tif/ contains tile-named files OR grid-size subdirectories if (!dir.exists(merged_final_tif_dir)) { return(list( @@ -61,6 +61,30 @@ detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NUL )) } + # First check if there are grid-size subdirectories (5x5, 10x10, etc.) + # This indicates the tiles are organized: merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif + grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) + + if (length(grid_patterns) > 0) { + # Found grid-size subdirectories - tiles exist! + grid_size <- grid_patterns[1] + grid_dir <- file.path(merged_final_tif_dir, grid_size) + + # List sample tile files from the grid directory + sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3] + + return(list( + has_tiles = TRUE, + detected_tiles = sample_tiles, + total_files = length(sample_tiles), + source = "grid_subdirectory_detection", + grid_size = grid_size, + grid_path = grid_dir + )) + } + + # Fall back to checking for tile-named files directly in merged_final_tif # List all .tif files in merged_final_tif tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)