From 3ee3f9e31c71f32949d0fc28cbf2ce73f3eb46bb Mon Sep 17 00:00:00 2001 From: Timon Date: Mon, 9 Feb 2026 10:40:01 +0100 Subject: [PATCH] shaving off some more details... --- MANUAL_PIPELINE_RUNNER.R | 613 ++++++++ r_app/10_create_master_grid_and_split_tiffs.R | 499 ------- r_app/10_create_per_field_tiffs.R | 42 +- r_app/10_create_per_field_tiffs_utils.R | 45 +- r_app/20_ci_extraction.R | 366 ----- r_app/20_ci_extraction_per_field.R | 17 +- r_app/21_convert_ci_rds_to_csv.R | 65 +- r_app/30_growth_model_utils.R | 112 +- r_app/40_mosaic_creation.R | 296 ---- r_app/40_mosaic_creation_per_field.R | 7 + r_app/40_mosaic_creation_per_field_utils.R | 32 +- r_app/80_calculate_kpis.R | 156 +- r_app/80_utils_common.R | 138 +- r_app/90_CI_report_with_kpis_simple.Rmd | 9 +- r_app/91_CI_report_with_kpis_Angata.Rmd | 11 +- r_app/parameters_project.R | 3 + r_app/run_full_pipeline.R | 1307 +++++++---------- 17 files changed, 1495 insertions(+), 2223 deletions(-) create mode 100644 MANUAL_PIPELINE_RUNNER.R delete mode 100644 r_app/10_create_master_grid_and_split_tiffs.R delete mode 100644 r_app/20_ci_extraction.R delete mode 100644 r_app/40_mosaic_creation.R diff --git a/MANUAL_PIPELINE_RUNNER.R b/MANUAL_PIPELINE_RUNNER.R new file mode 100644 index 0000000..8bf2ba8 --- /dev/null +++ b/MANUAL_PIPELINE_RUNNER.R @@ -0,0 +1,613 @@ +# ============================================================================== +# SMARTCANE MANUAL PIPELINE RUNNER +# ============================================================================== +# +# This file documents all pipeline steps as MANUAL COPY-PASTE COMMANDS. +# Do NOT run this script directly - instead, copy individual commands and +# paste them into your PowerShell terminal. +# +# This approach allows you to: +# - Run steps one at a time and inspect outputs +# - Re-run failed steps without re-running successful ones +# - Monitor progress between steps +# - Troubleshoot issues more easily than with automated pipeline +# +# ============================================================================== +# PIPELINE SEQUENCE (IN ORDER) +# ============================================================================== +# +# 1. Python: Download Planet satellite imagery (optional - only if new data needed) +# 2. R10: Split farm TIFFs into per-field directory structure +# 3. R20: Extract Canopy Index (CI) from 4-band imagery +# 4. R30: Interpolate growth model (smooth CI time series) +# 5. R21: Convert CI data to CSV format for Python +# 6. Python31: Harvest imminent predictions (optional - requires harvest.xlsx) +# 7. R40: Create weekly mosaic TIFFs +# 8. R80: Calculate KPIs (field uniformity, trends, stress) +# 9. R90/91: Generate Word reports (optional - Agronomic or Cane Supply) +# +# ============================================================================== +# BEFORE YOU START +# ============================================================================== +# +# 1. Open PowerShell in the smartcane root directory: +# C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\ +# +# 2. Define your parameters ONCE at the top of the session: +# +# $PROJECT = "angata" # Project: angata, chemba, xinavane, esa, simba +# $END_DATE = "2026-02-04" # YYYY-MM-DD format (e.g., 2026-02-04) +# $OFFSET = 7 # Days to look back (e.g., 7 for one week) +# $WEEK = 6 # ISO week number (1-53) - auto-calculated from END_DATE +# $YEAR = 2026 # ISO year - auto-calculated from END_DATE +# +# 3. Use these variables in the commands below by replacing [PROJECT], [END_DATE], etc. +# +# ============================================================================== +# COMMAND REFERENCE +# ============================================================================== + +# ============================================================================== +# STEP 0: PYTHON - Download Planet Satellite Imagery (OPTIONAL) +# ============================================================================== +# +# PURPOSE: +# Download 4-band (RGB+NIR) satellite imagery from Planet Labs API +# Downloads to: laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif +# +# WHEN TO RUN: +# - Only needed if you have new dates to process +# - Pipeline skips dates already in merged_tif/ or field_tiles/ +# - First-time setup: download for your date range +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# DATE: YYYY-MM-DD format (e.g., 2026-02-04) +# RESOLUTION: 3 meters (default) - can also use 5, 10 +# --cleanup: Delete intermediate files after download +# --clear-all: Clear all output folders before downloading +# +# COMMAND #1 - Single Date Download: +# +# cd python_app +# python 00_download_8band_pu_optimized.py [PROJECT] --date [DATE] --resolution 3 --cleanup +# +# Example: +# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup +# +# COMMAND #2 - Batch Download (Multiple Dates): +# +# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT] +# +# Example: +# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata +# +# EXPECTED OUTPUT: +# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file) +# +# Note: Planet API requires authentication (PLANET_API_KEY environment variable) +# Cost: ~1,500-2,000 PU per date +# +# ============================================================================ + + +# ============================================================================== +# STEP 1: R10 - Create Per-Field TIFF Structure +# ============================================================================== +# +# PURPOSE: +# Split farm-wide GeoTIFFs into per-field directory structure. +# Transforms: merged_tif/{DATE}.tif (single file) +# → field_tiles/{FIELD_ID}/{DATE}.tif (per-field files) +# This enables clean, scalable processing in downstream scripts. +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif (4-band RGB+NIR) +# - Field boundaries: laravel_app/storage/app/{PROJECT}/pivot.geojson +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif +# - One TIFF per field per date (1185 fields × N dates in Angata) +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata +# +# EXPECTED OUTPUT: +# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files) +# Storage location: laravel_app/storage/app/angata/field_tiles/ +# Script execution time: 5-10 minutes (depends on number of dates) +# +# ============================================================================ + + +# ============================================================================== +# STEP 2: R20 - Extract Chlorophyll Index (CI) +# ============================================================================== +# +# PURPOSE: +# Calculate Chlorophyll Index from 4-band imagery and create 5-band output TIFFs. +# Also extracts CI statistics per sub_field for daily tracking. +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif (4-band) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif (5-band with CI) +# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD_ID}/{DATE}.rds +# +# EXPECTED BEHAVIOR: +# If field_tiles_CI/ or daily_vals/ missing files, Script 20 will process them +# Script 20 skips files that already exist (to avoid re-processing) +# ⚠️ IF NOT ALL FILES CREATED: See troubleshooting section below +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - date range end +# OFFSET: Days to look back (e.g., 7 for one week window) +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET] +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7 +# +# EXPECTED OUTPUT: +# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/ +# Example: 1185 fields × 8 dates = 9,480 files in field_tiles_CI/ +# Storage location: laravel_app/storage/app/angata/field_tiles_CI/ +# Script execution time: 10-20 minutes (depends on number of dates+fields) +# +# NOTES: +# Script 20 processes dates between (END_DATE - OFFSET) and END_DATE +# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates) +# To process all existing merged_tif files: Use large OFFSET (e.g., 365) +# +# TROUBLESHOOTING: +# ❌ If field_tiles_CI has fewer files than field_tiles: +# - Check if all field_tiles/{FIELD}/{DATE}.tif files exist +# - Script 20 may be skipping due to incomplete source files +# - Solution: Delete problematic files from field_tiles and re-run Script 10 +# +# ============================================================================ + + +# ============================================================================== +# STEP 3: R30 - Interpolate Growth Model +# ============================================================================== +# +# PURPOSE: +# Smooth CI time series using LOESS interpolation to fill gaps. +# Creates continuous growth curves for each field across all measurement dates. +# Enables trend analysis, yield prediction, and cumulative growth metrics. +# +# INPUT: +# - Daily CI statistics from Script 20 (field_tiles_CI/ per-field RDS files) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# - (This is the growth model output used by Script 21 and 80) +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R [PROJECT] +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata +# +# EXPECTED OUTPUT: +# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds +# Contains: Interpolated CI data for all fields (wide format) +# Script execution time: 5-15 minutes +# +# ============================================================================ + + +# ============================================================================== +# STEP 4: R21 - Convert CI RDS to CSV (Python Format) +# ============================================================================== +# +# PURPOSE: +# Convert growth model output from R's RDS format to Python-compatible CSV. +# Transforms from wide format (fields × dates) to long format (one row per field-date pair). +# Prepares data for Python harvest detection models. +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# (Output from Script 30) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv +# - Columns: field, sub_field, Date, FitData, DOY, value +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R [PROJECT] +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata +# +# EXPECTED OUTPUT: +# File: ci_data_for_python.csv (~5-10 MB) +# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows) +# Script execution time: 1-2 minutes +# +# ============================================================================ + + +# ============================================================================== +# STEP 5: PYTHON31 - Harvest Imminent Predictions (OPTIONAL) +# ============================================================================== +# +# PURPOSE: +# Predict which fields are approaching harvest in the next 28 days. +# Uses neural network (Model 307) trained on historical harvest dates. +# Generates weekly probability scores for operational harvest scheduling. +# +# REQUIRES: +# - harvest.xlsx with field planting/harvest dates +# - ci_data_for_python.csv from Script 21 +# - PyTorch environment (conda pytorch_gpu) +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/Data/harvest.xlsx +# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv +# - Columns: field, sub_field, imminent_prob, detected_prob, week, year, as_of_date, num_days +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py [PROJECT] +# +# Example: +# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py angata +# +# EXPECTED OUTPUT: +# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv +# Rows: One per field (e.g., 1185 rows for Angata) +# Script execution time: 2-5 minutes +# +# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete +# +# ============================================================================ + + +# ============================================================================== +# STEP 6: R40 - Create Weekly Mosaic TIFFs +# ============================================================================== +# +# PURPOSE: +# Aggregate daily per-field CI TIFFs into weekly mosaics. +# Handles multiple dates (full week) with maximum CI value per pixel. +# Creates 5-band output for reporting and KPI calculations. +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif +# (Daily per-field CI TIFFs from Script 20) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_{WW}_{YYYY}.tif +# - One per field per week (e.g., 1185 fields × 1 week = 1,185 files) +# +# PARAMETERS: +# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - determines ISO week +# OFFSET: Days to look back (e.g., 7 for one week window) +# PROJECT: angata, chemba, xinavane, esa, simba +# +# COMMAND: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R [END_DATE] [OFFSET] [PROJECT] +# +# Example (one week window): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata +# +# Example (two week window): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 14 angata +# +# EXPECTED OUTPUT: +# Location: laravel_app/storage/app/angata/weekly_mosaic/ +# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif +# Files created: #fields (e.g., 1185 for Angata) +# Storage: ~50-100 MB total for all mosaic TIFFs +# Script execution time: 5-10 minutes +# +# NOTE: Files are named with ISO week number (WW) and year (YYYY) +# Week calculation is automatic based on END_DATE +# +# ============================================================================ + + +# ============================================================================== +# STEP 7: R80 - Calculate Key Performance Indicators (KPIs) +# ============================================================================== +# +# PURPOSE: +# Calculate per-field metrics from weekly mosaic TIFFs: +# - Field uniformity (CV - Coefficient of Variation) +# - Growth trends (4-week and 8-week) +# - Area change detection +# - TCH forecast +# - Spatial clustering (weed/stress detection) +# - Generates Excel export for dashboards and reporting +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_*.tif +# - Field boundaries (pivot.geojson) +# - Harvest data (harvest.xlsx) +# - Historical stats cache (RDS from previous weeks) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx +# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds (cached stats) +# - 21 columns with field-level KPIs and alerts +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# WEEK: ISO week number (1-53, optional - default current week) +# YEAR: ISO year (optional - default current year) +# +# COMMAND #1 - Current Week (Auto-detects from TODAY): +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT] +# +# Example: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata +# +# COMMAND #2 - Specific Week & Year: +# +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT] [WEEK] [YEAR] +# +# Example (Week 5, Year 2026): +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026 +# +# EXPECTED OUTPUT: +# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx +# Rows: One per field (e.g., 1185 for Angata) +# Columns: 21 KPI columns (uniformity, trend, alerts, etc.) +# Location: laravel_app/storage/app/angata/output/ +# Script execution time: 10-20 minutes +# +# EXPECTED COLUMNS: +# field, sub_field, phase, cv (uniformity), ci_mean, area_ha, area_ac, +# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence, +# spatial_cluster, alert_urgency, alert_type, alert_message, etc. +# +# ============================================================================ + + +# ============================================================================== +# STEP 8: R90/R91 - Generate Word Report (OPTIONAL) +# ============================================================================== +# +# PURPOSE: +# Generate formatted Word report (.docx) with: +# - KPI summary tables and charts +# - Per-field performance metrics +# - Alerts and recommendations +# - Interpretation guides +# +# Client-Specific Reports: +# - R90: Agronomic Support (for AURA project) +# - R91: Cane Supply (for ANGATA, CHEMBA, XINAVANE, ESA) +# +# INPUT: +# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx +# (from Script 80) +# +# OUTPUT: +# - laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_*.docx +# - Formatted Word document (~5-10 MB) +# +# PARAMETERS: +# PROJECT: angata, chemba, xinavane, esa, simba +# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) +# REPORT_TYPE: agronomic or cane_supply (determines which Rmd file to render) +# +# COMMAND #1 - AGRONOMIC REPORT (AURA project): +# From R console or R script: +# +# rmarkdown::render( +# "r_app/90_CI_report_with_kpis_simple.Rmd", +# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")), +# output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx", +# output_dir = "laravel_app/storage/app/angata/reports" +# ) +# +# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA): +# From R console or R script: +# +# rmarkdown::render( +# "r_app/91_CI_report_with_kpis_Angata.Rmd", +# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")), +# output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx", +# output_dir = "laravel_app/storage/app/angata/reports" +# ) +# +# EXPECTED OUTPUT: +# File: SmartCane_Report_*_{PROJECT}_{DATE}.docx +# Location: laravel_app/storage/app/{PROJECT}/reports/ +# Script execution time: 5-10 minutes +# +# NOTE: +# These are R Markdown files and cannot be run directly via Rscript +# Use rmarkdown::render() from an R interactive session or wrapper script +# See run_full_pipeline.R for an automated example +# +# ============================================================================ + + +# ============================================================================== +# QUICK REFERENCE: Common Workflows +# ============================================================================== +# +# WORKFLOW A: Weekly Update (Most Common) +# ───────────────────────────────────────────────────────────────────────── +# Goal: Process latest week of data through full pipeline +# +# Parameters: +# $PROJECT = "angata" +# $END_DATE = "2026-02-04" # Today or latest date available +# $OFFSET = 7 # One week back +# +# Steps: +# 1. SKIP Python download (if you already have data) +# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata +# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7 +# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata +# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata +# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata +# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata +# 8. OPTIONAL R91 (Cane Supply) - Use automated runner: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R +# OR from R console: +# rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd", +# params=list(data_dir="angata", report_date=as.Date("2026-02-04")), +# output_file="SmartCane_Report_cane_supply_angata_2026-02-04.docx", +# output_dir="laravel_app/storage/app/angata/reports") +# +# Execution time: ~60-90 minutes total +# +# +# WORKFLOW B: Initial Setup (Large Backfill) +# ───────────────────────────────────────────────────────────────────────── +# Goal: Process multiple weeks of historical data +# +# Steps: +# 1. Python download (your entire date range) +# 2. Run R10 once (processes all dates) +# 3. Run R20 with large offset to process all historical dates: +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365 +# (This processes from 2025-02-04 to 2026-02-04, covering entire year) +# 4. Run R30 once (growth model full season) +# 5. Run R21 once (CSV export) +# 6. Run R40 with specific week windows as needed +# 7. Run R80 for each week you want KPIs for + +# 6. For each week, run: +# - R40 with different END_DATE values (one per week) +# - R80 with different WEEK/YEAR values (one per week) +# - R91 optional (one per week report) +# +# Pro tip: Script R40 with offset=14 covers two weeks at once +# Then R40 again with offset=7 for just one week +# +# +# WORKFLOW C: Troubleshooting (Check Intermediate Outputs) +# ───────────────────────────────────────────────────────────────────────── +# Goal: Verify outputs before moving to next step +# +# After R10: Check field_tiles/{FIELD_ID}/ has #dates files +# After R20: Check field_tiles_CI/{FIELD_ID}/ has same #dates files +# After R30: Check Data/extracted_ci/cumulative_vals/ has All_pivots_*.rds +# After R40: Check weekly_mosaic/{FIELD_ID}/ has week_WW_YYYY.tif per week +# After R80: Check output/ has {PROJECT}_field_analysis_week*.xlsx +# +# ============================================================================ + +# ============================================================================== +# TROUBLESHOOTING +# ============================================================================== +# +# ISSUE: R20 not processing all field_tiles files +# ──────────────────────────────────────────────── +# Symptom: field_tiles has 496 files, field_tiles_CI only has 5 +# +# Possible causes: +# 1. Source files incomplete or corrupted +# 2. Script 20 skips because CI TIFF already exists (even if incomplete) +# 3. Partial run from previous attempt +# +# Solutions: +# 1. Delete the small number of files in field_tiles_CI/{FIELD}/ (don't delete all!) +# rm laravel_app/storage/app/angata/field_tiles_CI/{fieldnum}/* +# 2. Re-run Script 20 +# 3. If still fails, delete field_tiles_CI completely and re-run Script 20 +# rm -r laravel_app/storage/app/angata/field_tiles_CI/ +# +# ISSUE: Script 80 says "No per-field mosaic files found" +# ──────────────────────────────────────────────────────── +# Symptom: R80 fails to calculate KPIs +# +# Possible causes: +# 1. Script 40 hasn't run yet (weekly_mosaic doesn't exist) +# 2. Wrong END_DATE or WEEK/YEAR combination +# 3. weekly_mosaic/{FIELD}/ directory missing (old format?) +# +# Solutions: +# 1. Ensure Script 40 has completed: Check weekly_mosaic/{FIELD}/ exists with week_WW_YYYY.tif +# 2. Verify END_DATE is within date range of available CI data +# 3. For current week: End date must be THIS week (same ISO week as today) +# +# ISSUE: Python download fails ("Not authorized") +# ──────────────────────────────────────────────── +# Symptom: python 00_download_8band_pu_optimized.py fails with authentication error +# +# Cause: PLANET_API_KEY environment variable not set +# +# Solution: +# 1. Save your Planet API key: $env:PLANET_API_KEY = "your_key_here" +# 2. Verify: $env:PLANET_API_KEY (should show your key) +# 3. Try download again +# +# ISSUE: R30 takes too long +# ────────────────────────── +# Symptom: Script 30 running for >30 minutes +# +# Cause: LOESS interpolation is slow with many dates/fields +# +# Solution: +# 1. This is normal - large date ranges slow down interpolation +# 2. Subsequent runs are faster (cached results) +# 3. If needed: reduce offset or run fewer weeks at a time +# +# ============================================================================== + +# ============================================================================== +# SUMMARY OF FILES CREATED BY EACH SCRIPT +# ============================================================================== +# +# Script 10 creates: +# laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD}/{DATE}.tif +# +# Script 20 creates: +# laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD}/{DATE}.tif +# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds +# +# Script 30 creates: +# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# +# Script 21 creates: +# laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv +# +# Python 31 creates: +# laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv +# +# Script 40 creates: +# laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD}/{DATE}/week_{WW}_{YYYY}.tif +# +# Script 80 creates: +# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx +# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds +# +# Script 90/91 creates: +# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx +# +# ============================================================================== diff --git a/r_app/10_create_master_grid_and_split_tiffs.R b/r_app/10_create_master_grid_and_split_tiffs.R deleted file mode 100644 index fdab65c..0000000 --- a/r_app/10_create_master_grid_and_split_tiffs.R +++ /dev/null @@ -1,499 +0,0 @@ -#' Combined: Create master grid and split TIFFs into tiles -#' ==================================================================== -#' -#' Purpose: -#' 1. Check all daily TIFFs for matching extents -#' 2. Create master 5×5 grid covering all TIFFs -#' 3. Split each daily TIFF into 25 tiles using the master grid -#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif -#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18 - - -library(terra) -library(sf) - -# ============================================================================ -# CONFIGURATION & COMMAND-LINE ARGUMENTS -# ============================================================================ - -# Parse command-line arguments for date filtering -args <- commandArgs(trailingOnly = TRUE) - -# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17 -start_date <- NULL -end_date <- NULL - -if (length(args) >= 1) { - start_date <- as.Date(args[1]) - cat("Filtering: start date =", as.character(start_date), "\n") -} - -if (length(args) >= 2) { - end_date <- as.Date(args[2]) - cat("Filtering: end date =", as.character(end_date), "\n") -} - -PROJECT <- "angata" -TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b") - -# GRID SIZE CONFIGURATION - Change this to use different grid sizes -# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc. -# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc. -GRID_NROWS <- 5 -GRID_NCOLS <- 5 - -# Construct grid-specific subfolder path -GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS) -OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL) - -# Load field boundaries for overlap checking -GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson") - -cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "") -cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") - -# ============================================================================ -# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID -# ============================================================================ - -cat("\n[PART 1] Creating Master Grid\n") - -# Load field boundaries for overlap checking -cat("\n[1] Checking for existing master grid...\n") - -# Check if master grid already exists -MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson")) - -if (file.exists(MASTER_GRID_PATH)) { - cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "") - master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE) - field_boundaries_sf <- NULL # No need to load pivot.geojson - field_boundaries_vect <- NULL - - cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "") - -} else { - # No existing grid - need to create one from pivot.geojson - cat(" No existing grid found. Creating new one from pivot.geojson...\n") - - if (!file.exists(GEOJSON_PATH)) { - stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n", - "Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ", - "from the same directory as a previous successful run (grid already exists).") - } - - field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE) - field_boundaries_vect <- terra::vect(GEOJSON_PATH) - - cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "") -} - -# Try to find a name column (only if field_boundaries_sf exists) -if (!is.null(field_boundaries_sf)) { - field_names <- NA - if ("name" %in% names(field_boundaries_sf)) { - field_names <- field_boundaries_sf$name - } else if ("field" %in% names(field_boundaries_sf)) { - field_names <- field_boundaries_sf$field - } else if ("field_name" %in% names(field_boundaries_sf)) { - field_names <- field_boundaries_sf$field_name - } else { - field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices - } - - cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "") -} - -# Helper function: Check if a tile overlaps with any field (simple bbox overlap) -tile_overlaps_fields <- function(tile_extent, field_geoms) { - tryCatch({ - # Simple bounding box overlap test - no complex geometry operations - # Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR - # box1.ymax < box2.ymin OR box1.ymin > box2.ymax) - - # For each field geometry, check if it overlaps with tile bbox - for (i in seq_len(length(field_geoms))) { - # Skip empty geometries - if (st_is_empty(field_geoms[i])) { - next - } - - # Get field bbox - field_bbox <- st_bbox(field_geoms[i]) - - # Check bbox overlap (simple coordinate comparison) - x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax) - y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax) - - if (x_overlap && y_overlap) { - return(TRUE) # Found overlap! - } - } - - return(FALSE) # No overlap found - - }, error = function(e) { - cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "") - return(TRUE) # Default to including tile if there's an error - }) -} - -cat("\n[2] Checking TIFF extents...\n") - -tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE) -tiff_files <- sort(tiff_files) - -# Filter by date range if specified -if (!is.null(start_date) || !is.null(end_date)) { - cat("\nApplying date filter...\n") - - file_dates <- as.Date(sub("\\.tif$", "", tiff_files)) - - if (!is.null(start_date) && !is.null(end_date)) { - keep_idx <- file_dates >= start_date & file_dates <= end_date - cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "") - } else if (!is.null(start_date)) { - keep_idx <- file_dates >= start_date - cat(" From: ", as.character(start_date), "\n", sep = "") - } else { - keep_idx <- file_dates <= end_date - cat(" Until: ", as.character(end_date), "\n", sep = "") - } - - tiff_files <- tiff_files[keep_idx] - cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "") -} - -if (length(tiff_files) == 0) { - stop("No TIFF files found in ", TIFF_FOLDER) -} - -cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "") -cat(" Checking extents... (this may take a while)\n") - -# Load all extents - ONE TIME, upfront -extents <- list() -for (i in seq_along(tiff_files)) { - tiff_path <- file.path(TIFF_FOLDER, tiff_files[i]) - raster <- terra::rast(tiff_path) - ext <- terra::ext(raster) - extents[[i]] <- ext - - # Progress indicator every 50 files - if (i %% 50 == 0) { - cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "") - } -} - -cat(" ✓ All extents loaded\n") - -# Check if all extents match -cat("\n[3] Comparing extents...\n") - -tolerance <- 1e-8 -all_match <- TRUE -first_ext <- extents[[1]] - -for (i in 2:length(extents)) { - curr_ext <- extents[[i]] - match <- ( - abs(curr_ext$xmin - first_ext$xmin) < tolerance && - abs(curr_ext$xmax - first_ext$xmax) < tolerance && - abs(curr_ext$ymin - first_ext$ymin) < tolerance && - abs(curr_ext$ymax - first_ext$ymax) < tolerance - ) - if (!match) { - all_match <- FALSE - cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "") - cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ", - "Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "") - cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ", - "Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "") - } -} - -if (all_match) { - cat(" ✓ All TIFF extents MATCH perfectly!\n") -} else { - cat(" ⚠️ Extents differ - creating master extent covering all\n") -} - -# Create master extent -cat("\n[4] Creating master extent...\n") - -master_xmin <- min(sapply(extents, function(e) e$xmin)) -master_xmax <- max(sapply(extents, function(e) e$xmax)) -master_ymin <- min(sapply(extents, function(e) e$ymin)) -master_ymax <- max(sapply(extents, function(e) e$ymax)) - -x_range_m <- (master_xmax - master_xmin) * 111320 -y_range_m <- (master_ymax - master_ymin) * 111320 - -cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ", - "Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "") -cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "") - -# Auto-determine grid size based on ROI dimensions -if (x_range_m < 10000 && y_range_m < 10000) { - cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n") - GRID_NROWS <- 1 - GRID_NCOLS <- 1 -} else { - cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n") - GRID_NROWS <- 5 - GRID_NCOLS <- 5 -} - -N_TILES <- GRID_NROWS * GRID_NCOLS - -# Check if master grid already exists -cat("\n[5] Checking if master grid exists...\n") - -master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson")) - -if (file.exists(master_grid_file)) { - cat(" ✓ Master grid exists! Loading existing grid...\n") - master_grid_sf <- st_read(master_grid_file, quiet = TRUE) - master_grid_vect <- terra::vect(master_grid_file) - cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "") -} else { - cat(" Grid does not exist. Creating new master grid...\n") - - # Create 5×5 grid - cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "") - - master_bbox <- st_bbox(c( - xmin = master_xmin, - xmax = master_xmax, - ymin = master_ymin, - ymax = master_ymax - ), crs = 4326) - - bbox_sf <- st_as_sfc(master_bbox) - - master_grid <- st_make_grid( - bbox_sf, - n = c(GRID_NCOLS, GRID_NROWS), - what = "polygons" - ) - - master_grid_sf <- st_sf( - tile_id = sprintf("%02d", 1:length(master_grid)), - geometry = master_grid - ) - - cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "") - - # Convert to SpatVector for use in makeTiles - master_grid_vect <- terra::vect(master_grid_sf) - - # Save master grid - if (!dir.exists(OUTPUT_FOLDER)) { - dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE) - } - st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE) - cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") -} - -# ============================================================================ -# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES) -# ============================================================================ - -cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n") - -# If grid was loaded from file, it's already filtered. Skip filtering. -if (!file.exists(MASTER_GRID_PATH)) { - cat("\n[7] Filtering master grid to only overlapping tiles...\n") - - # Check which tiles overlap with any field - overlapping_tile_indices <- c() - for (tile_idx in 1:nrow(master_grid_sf)) { - tile_geom <- master_grid_sf[tile_idx, ] - - # Check overlap with any field - if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) { - overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx) - } - } - - cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "") - cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "") - - # Create filtered grid with only overlapping tiles - filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ] - filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices) -} else { - cat("\n[7] Using pre-filtered grid (already loaded from file)...\n") - # Grid was already loaded - it's already filtered - filtered_grid_sf <- master_grid_sf -} - -# Convert to SpatVector for makeTiles -filtered_grid_vect <- terra::vect(filtered_grid_sf) - -cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "") - -# ============================================================================ -# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE) -# ============================================================================ - -cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n") -cat("\n[8] Processing each date independently...\n") -cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n") - -total_tiles_created <- 0 -dates_skipped <- 0 -dates_processed <- 0 - -for (file_idx in seq_along(tiff_files)) { - tiff_file <- tiff_files[file_idx] - date_str <- gsub("\\.tif$", "", tiff_file) - - # Create date-specific output folder - date_output_folder <- file.path(OUTPUT_FOLDER, date_str) - - # CHECK: Skip if date already processed (RESUME-SAFE) - if (dir.exists(date_output_folder)) { - existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$") - existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)] - - if (length(existing_tiles) > 0) { - cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str, - " (", length(existing_tiles), " tiles already exist)\n", sep = "") - dates_skipped <- dates_skipped + 1 - next # Skip this date - } - } - - cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "") - dates_processed <- dates_processed + 1 - - # Load TIFF for this date only - tiff_path <- file.path(TIFF_FOLDER, tiff_file) - raster <- terra::rast(tiff_path) - - dims <- dim(raster) - cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "") - - # Create date-specific output folder - if (!dir.exists(date_output_folder)) { - dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE) - } - - cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "") - - # Use makeTiles with FILTERED grid (only overlapping tiles) - tiles_list <- terra::makeTiles( - x = raster, - y = filtered_grid_vect, - filename = file.path(date_output_folder, "tile.tif"), - overwrite = TRUE - ) - - # Rename tiles to [DATE]_[TILE_ID].tif - for (tile_idx in seq_along(tiles_list)) { - source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif")) - tile_id <- filtered_grid_sf$tile_id[tile_idx] - final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif")) - - if (file.exists(source_file)) { - file.rename(source_file, final_file) - } - } - - cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "") - total_tiles_created <- total_tiles_created + length(tiles_list) -} - -# ============================================================================ -# VERIFICATION -# ============================================================================ - -cat("\n[9] Verifying output...\n") - -# Count tiles per date folder -date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE) -date_folders <- sort(date_folders[date_folders != "."]) - -total_tile_files <- 0 -for (date_folder in date_folders) { - tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder), - pattern = "\\.tif$") - tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)] - total_tile_files <- total_tile_files + length(tiles_in_folder) - cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "") -} - -# ============================================================================ -# SUMMARY -# ============================================================================ - -cat("\n\n========== SUMMARY ==========\n") - -cat("\nGrid Configuration:\n") -cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "") -cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") -cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") - -cat("\nField Filtering:\n") -cat(" - Field boundaries loaded from pivot.geojson\n") -cat(" - Only overlapping tiles created (empty tiles deleted)\n") -cat(" - Significant storage savings for sparse fields!\n") - -cat("\nProcessing Summary:\n") -cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "") -cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "") -cat(" - Dates processed: ", dates_processed, "\n", sep = "") -cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "") -if (dates_processed > 0) { - avg_tiles_per_date <- total_tiles_created / dates_processed - cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "") -} - -cat("\nDirectory Structure:\n") -cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "") -cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "") -cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") -cat(" ├── 2024-01-15/\n") -cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n") -cat(" │ ├── 2024-01-15_05.tif\n") -cat(" │ └── ...\n") -cat(" ├── 2024-01-16/\n") -cat(" │ └── ...\n") -cat(" └── ...\n") - -cat("\n⭐ Key Benefits:\n") -cat(" ✓ Overlap-filtered: No wasted empty tiles\n") -cat(" ✓ Skip existing dates: Resume-safe, idempotent\n") -cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n") -cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n") - -# ============================================================================ -# WRITE TILING CONFIGURATION METADATA -# ============================================================================ -# This metadata file is read by parameters_project.R to determine mosaic mode -# It allows script 40 to know what script 10 decided without re-computing - -cat("\n[10] Writing tiling configuration metadata...\n") - -config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json") -config_json <- paste0( - '{\n', - ' "project": "', PROJECT, '",\n', - ' "has_tiles": ', tolower(N_TILES > 1), ',\n', - ' "grid_size": "', GRID_SIZE_LABEL, '",\n', - ' "grid_rows": ', GRID_NROWS, ',\n', - ' "grid_cols": ', GRID_NCOLS, ',\n', - ' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n', - ' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n', - ' "created_date": "', Sys.Date(), '",\n', - ' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n', - '}\n' -) - -writeLines(config_json, config_file) -cat(" ✓ Metadata saved to: tiling_config.json\n") -cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "") -cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "") - -cat("\n✓ Script complete!\n") diff --git a/r_app/10_create_per_field_tiffs.R b/r_app/10_create_per_field_tiffs.R index 9c32cf1..e192dd7 100644 --- a/r_app/10_create_per_field_tiffs.R +++ b/r_app/10_create_per_field_tiffs.R @@ -68,7 +68,14 @@ main <- function() { setwd("..") } - # STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations) + # STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R) + args <- commandArgs(trailingOnly = TRUE) + project_dir <- if (length(args) == 0) "angata" else args[1] + + # Make project_dir available to sourced files (they execute in global scope) + assign("project_dir", project_dir, envir = .GlobalEnv) + + # STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined) # Load parameters_project.R (provides safe_log, setup_project_directories, etc.) tryCatch({ source("r_app/parameters_project.R") @@ -85,12 +92,31 @@ main <- function() { stop(e) }) - # STEP 3: Parse command-line arguments - args <- commandArgs(trailingOnly = TRUE) - project_dir <- if (length(args) == 0) "angata" else args[1] + # STEP 4: Set default date parameters (can be overridden by pipeline runner via assign()) + # These control which dates Script 10 processes from merged_tif/ + # Window: end_date - offset days to end_date + # Always coerce to correct types to avoid issues with lingering/inherited values + if (!exists("end_date") || !inherits(end_date, "Date")) { + end_date <- as.Date("2026-02-04") + safe_log(paste("Using default end_date:", end_date), "INFO") + } + if (!exists("offset") || !is.numeric(offset)) { + offset <- 7 + safe_log(paste("Using default offset:", offset, "days"), "INFO") + } - # STEP 4: Now all utilities are loaded, proceed with script logic - # Load centralized path structure (creates all directories automatically) + # Ensure offset is numeric (in case it came in as a character string from environment) + if (is.character(offset)) { + offset <- as.numeric(offset) + } + + # Calculate date window for processing + start_date <- end_date - offset + date_window <- seq(start_date, end_date, by = "day") + date_window_str <- format(date_window, "%Y-%m-%d") + safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO") + + # STEP 5: Load centralized path structure (creates all directories automatically) paths <- setup_project_directories(project_dir) safe_log(paste("Project:", project_dir)) @@ -109,7 +135,9 @@ main <- function() { # PHASE 1: Process new downloads (always runs) # Pass field_tiles_ci_dir so it can skip dates already migrated - process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir) + # Also pass end_date and offset so only dates in window are processed + process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir, + end_date = end_date, offset = offset) safe_log("\n========================================", "INFO") safe_log("FINAL SUMMARY", "INFO") diff --git a/r_app/10_create_per_field_tiffs_utils.R b/r_app/10_create_per_field_tiffs_utils.R index 36f4eb7..38719e5 100644 --- a/r_app/10_create_per_field_tiffs_utils.R +++ b/r_app/10_create_per_field_tiffs_utils.R @@ -156,6 +156,11 @@ crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) { #' TIFFs are stored. If provided, skips dates #' already processed and moved to field_tiles_CI/. #' Default: NULL (process all dates). +#' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD). +#' Default: NULL (process all available dates). +#' @param offset Integer. Optional. Number of days to look back from end_date. +#' Only used if end_date is also provided. +#' Default: NULL (process all available dates). #' #' @return List with elements: #' - total_created: Integer. Total field TIFFs created across all dates @@ -187,7 +192,8 @@ crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) { #' result$total_created, result$total_skipped, result$total_errors)) #' } #' -process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL) { +process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL, + end_date = NULL, offset = NULL) { safe_log("\n========================================", "INFO") safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO") @@ -211,6 +217,19 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel full.names = TRUE ) + # FILTER by date window if end_date and offset provided + if (!is.null(end_date) && !is.null(offset)) { + start_date <- end_date - offset + date_range <- seq(start_date, end_date, by = "day") + date_range_str <- format(date_range, "%Y-%m-%d") + + # Extract dates from filenames and filter + tiff_dates <- gsub("\\.tif$", "", basename(tiff_files)) + tiff_files <- tiff_files[tiff_dates %in% date_range_str] + + safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO") + } + safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO") if (length(tiff_files) == 0) { @@ -226,7 +245,7 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel for (tif_path in tiff_files) { tif_date <- gsub("\\.tif$", "", basename(tif_path)) - # MIGRATION MODE CHECK: Skip if this date was already migrated to field_tiles_CI/ + # CHECK 1: Skip if this date was already migrated to field_tiles_CI/ # (This means Script 20 already processed it and extracted RDS) if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) { # Check if ANY field has this date in field_tiles_CI/ @@ -249,6 +268,28 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel } } + # CHECK 2: Skip if this date already exists in field_tiles/ + # (means this date has already been processed through Script 10) + if (dir.exists(field_tiles_dir)) { + date_exists_in_field_tiles <- FALSE + + # Check if ANY field directory has this date + field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + for (field_dir in field_dirs) { + potential_file <- file.path(field_dir, paste0(tif_date, ".tif")) + if (file.exists(potential_file)) { + date_exists_in_field_tiles <- TRUE + break + } + } + + if (date_exists_in_field_tiles) { + safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO") + total_skipped <- total_skipped + 1 + next + } + } + safe_log(paste("Processing:", tif_date), "INFO") result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir) diff --git a/r_app/20_ci_extraction.R b/r_app/20_ci_extraction.R deleted file mode 100644 index 17186a5..0000000 --- a/r_app/20_ci_extraction.R +++ /dev/null @@ -1,366 +0,0 @@ -# ============================================================================ -# SCRIPT 20: Canopy Index (CI) Extraction from Satellite Imagery -# ============================================================================ -# PURPOSE: -# Extract Canopy Index (CI) from 4-band or 8-band satellite imagery and -# mask by field boundaries. Supports automatic band detection, cloud masking -# with UDM2 (8-band), and per-field CI value extraction. Produces both -# per-field TIFFs and consolidated CI statistics for growth model input. -# -# INPUT DATA: -# - Source: laravel_app/storage/app/{project}/field_tiles/{FIELD}/{DATE}.tif -# - Format: GeoTIFF (4-band RGB+NIR from Planet API, or 8-band with UDM2) -# - Requirement: Field boundaries (pivot.geojson) for masking -# -# OUTPUT DATA: -# - Destination: laravel_app/storage/app/{project}/field_tiles_CI/{FIELD}/{DATE}.tif -# - Format: GeoTIFF (5-band: R,G,B,NIR,CI as float32) -# - Also exports: combined_CI/combined_CI_data.rds (wide format: fields × dates) -# -# USAGE: -# Rscript 20_ci_extraction.R [end_date] [offset] [project] [data_source] -# -# Example (Windows PowerShell): -# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif -# -# PARAMETERS: -# - end_date: End date for processing (character, YYYY-MM-DD format) -# - offset: Days to look back from end_date (numeric, default 7) -# - project: Project name (character) - angata, chemba, xinavane, esa, simba -# - data_source: Data source directory (character, optional) - "merged_tif" (default), "merged_tif_8b", "merged_final_tif" -# -# CLIENT TYPES: -# - cane_supply (ANGATA): Yes - core data processing -# - agronomic_support (AURA): Yes - supports field health monitoring -# -# DEPENDENCIES: -# - Packages: terra, sf, tidyverse, lubridate, readxl, furrr, future -# - Utils files: parameters_project.R, 00_common_utils.R, 20_ci_extraction_utils.R -# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx) -# - Data directories: field_tiles/, field_tiles_CI/, combined_CI/ -# -# NOTES: -# - CI formula: (NIR - Red) / (NIR + Red); normalized to 0-5 range -# - 8-band data automatically cloud-masked using UDM2 (band 7-8) -# - 4-band data assumes clear-sky Planet PSScene imagery -# - Parallel processing via furrr for speed optimization -# - Output RDS uses wide format (fields as rows, dates as columns) for growth model -# - Critical dependency for Script 30 (growth model) and Script 80 (KPIs) -# -# RELATED ISSUES: -# SC-112: Utilities restructuring -# SC-108: Core pipeline improvements -# -# ============================================================================ - - -# 1. Load required packages -# ----------------------- -suppressPackageStartupMessages({ - # Spatial data handling - library(sf) # For reading/manipulating field boundaries (GeoJSON) - library(terra) # For raster operations (CI extraction from TIFFs) - - # Data manipulation - library(tidyverse) # For dplyr, ggplot2, readr (data wrangling and visualization) - library(lubridate) # For date/time operations (parsing satellite dates) - - # File I/O - library(readxl) # For reading harvest.xlsx (harvest dates for field mapping) - library(here) # For relative path resolution (platform-independent file paths) -}) - -# 2. Process command line arguments -# ------------------------------ -main <- function() { - # Capture command line arguments - args <- commandArgs(trailingOnly = TRUE) - - # Process end_date argument - if (length(args) >= 1 && !is.na(args[1]) && args[1] != "") { - # Parse date explicitly in YYYY-MM-DD format from command line - end_date <- as.Date(args[1], format = "%Y-%m-%d") - if (is.na(end_date)) { - warning("Invalid end_date provided. Using default (current date).") - end_date <- Sys.Date() - #end_date <- "2023-10-01" - } - } else { - end_date <- Sys.Date() - #end_date <- "2023-10-01" - } - - # Process offset argument - if (length(args) >= 2 && !is.na(args[2])) { - offset <- as.numeric(args[2]) - if (is.na(offset) || offset <= 0) { - warning("Invalid offset provided. Using default (7 days).") - offset <- 7 - } - } else { - offset <- 7 - } - - # Process project_dir argument - if (length(args) >= 3 && !is.na(args[3])) { - project_dir <- as.character(args[3]) - } else if (exists("project_dir", envir = .GlobalEnv)) { - project_dir <- get("project_dir", envir = .GlobalEnv) - } else { - project_dir <- "angata" # Changed default from "aura" to "esa" - } - - # Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif) - if (length(args) >= 4 && !is.na(args[4])) { - data_source <- as.character(args[4]) - # Validate data_source is a recognized option - if (!data_source %in% c("merged_tif_8b", "merged_tif", "merged_final_tif")) { - warning(paste("Data source", data_source, "not in standard list. Using as-is.")) - } - } else if (exists("data_source", envir = .GlobalEnv)) { - data_source <- get("data_source", envir = .GlobalEnv) - } else { - data_source <- "merged_tif_8b" # Default to 8-band (newer data with cloud masking) - } - - # Make project_dir and data_source available globally - assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) - - cat(sprintf("CI Extraction: project=%s, end_date=%s, offset=%d days, data_source=%s\n", - project_dir, format(end_date, "%Y-%m-%d"), offset, data_source)) - - # Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction) - ci_extraction_script <- TRUE - assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv) - - # 3. Initialize project configuration - # -------------------------------- - new_project_question <- TRUE - - cat("[DEBUG] Attempting to source r_app/parameters_project.R\n") - tryCatch({ - source("r_app/parameters_project.R") - cat("[DEBUG] Successfully sourced r_app/parameters_project.R\n") - }, error = function(e) { - cat("[ERROR] Failed to source r_app/parameters_project.R:\n", e$message, "\n") - stop(e) - }) - - # Load centralized path structure (creates all directories automatically) - paths <- setup_project_directories(project_dir) - - cat("[DEBUG] Attempting to source r_app/00_common_utils.R\n") - tryCatch({ - source("r_app/00_common_utils.R") - cat("[DEBUG] Successfully sourced r_app/00_common_utils.R\n") - }, error = function(e) { - cat("[ERROR] Failed to source r_app/00_common_utils.R:\n", e$message, "\n") - stop(e) - }) - - cat("[DEBUG] Attempting to source r_app/20_ci_extraction_utils.R\n") - tryCatch({ - source("r_app/20_ci_extraction_utils.R") - cat("[DEBUG] Successfully sourced r_app/20_ci_extraction_utils.R\n") - }, error = function(e) { - cat("[ERROR] Failed to source r_app/20_ci_extraction_utils.R:\n", e$message, "\n") - stop(e) - }) - - - # 4. Generate date list for processing - # --------------------------------- - dates <- date_list(end_date, offset) - log_message(paste("Processing data for week", dates$week, "of", dates$year)) - - # 4a. CHECK DAILY CI EXTRACTION - Skip dates that already have extracted files - # ------------------------------------------------------------------------- - log_message("\n===== CHECKING DAILY CI EXTRACTION STATUS =====") - - # Check which dates already have extracted CI files - already_extracted <- c() - missing_extraction <- c() - - if (dir.exists(daily_CI_vals_dir)) { - existing_ci_files <- list.files(daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$") - # Extract dates from filenames like "extracted_2025-12-31_quadrant.rds" - already_extracted <- sub("^extracted_(.+)_.*\\.rds$", "\\1", existing_ci_files) - } - - # Find which dates in our processing range need extraction - missing_extraction <- dates$days_filter[!(dates$days_filter %in% already_extracted)] - - cat(sprintf("[CI CHECK] Already extracted: %d dates\n", length(already_extracted))) - cat(sprintf("[CI CHECK] Need extraction: %d dates (from %s to %s)\n", - length(missing_extraction), - if(length(missing_extraction) > 0) min(missing_extraction) else "N/A", - if(length(missing_extraction) > 0) max(missing_extraction) else "N/A")) - - # If any dates need extraction, we'll extract them - # If NO dates need extraction, we'll skip extraction but ALWAYS rebuild combined_CI_data.rds - skip_extraction <- (length(missing_extraction) == 0) - - if (skip_extraction) { - log_message("✓ All dates in processing range already have extracted CI files - skipping extraction") - log_message("⚠ Will rebuild combined_CI_data.rds to ensure completeness") - } - - # 4b. CHECK SOURCE DATA AVAILABILITY - # --------------------------------------------------------------- - # Verify that source data exists for dates we're going to extract - # If a date is missing from source, we'll skip it gracefully - log_message("\n===== CHECKING SOURCE DATA AVAILABILITY =====") - - dates_with_source <- c() - dates_missing_source <- c() - - if (!skip_extraction && length(missing_extraction) > 0) { - # Check which source dates are actually available - for (date_str in missing_extraction) { - # Look for the date in merged_tif directory - source_file_pattern <- sprintf("%s\\.tif$", date_str) - files_for_date <- list.files(planet_tif_folder, pattern = source_file_pattern) - - if (length(files_for_date) > 0) { - dates_with_source <- c(dates_with_source, date_str) - } else { - dates_missing_source <- c(dates_missing_source, date_str) - } - } - - cat(sprintf("[SOURCE CHECK] Dates with available source data: %d\n", length(dates_with_source))) - cat(sprintf("[SOURCE CHECK] Dates missing from source (will skip): %d\n", length(dates_missing_source))) - - if (length(dates_missing_source) > 0) { - log_message(paste("⚠ Skipping extraction for missing source dates:", paste(dates_missing_source, collapse = ", "))) - } - } - - # 5. Find and filter raster files by date - with grid size detection - # ----------------------------------- - log_message("Searching for raster files") - - # Check if tiles exist (Script 10 output) - detect grid size dynamically using centralized paths - tiles_split_base <- paths$daily_tiles_split_dir - - # Detect grid size from daily_tiles_split folder structure - # Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc. - grid_size <- NA - if (dir.exists(tiles_split_base)) { - subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) - # Look for grid size patterns like "5x5", "10x10", "20x20" - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - grid_size <- grid_patterns[1] # Use first grid size found - log_message(paste("Detected grid size:", grid_size)) - } - } - - # Construct tile folder path with grid size - if (!is.na(grid_size)) { - tile_folder <- file.path(tiles_split_base, grid_size) - } else { - tile_folder <- tiles_split_base - } - - use_tiles <- dir.exists(tile_folder) - - # Make grid_size available globally for other functions - assign("grid_size", grid_size, envir = .GlobalEnv) - - tryCatch({ - if (skip_extraction) { - log_message("\n===== SKIPPING CI EXTRACTION (all dates already processed) =====") - } else if (use_tiles) { - # Use tile-based processing - log_message(paste("Tile folder detected at", tile_folder)) - log_message("Using tile-based CI extraction") - - # Call the tile-based extraction function - process_ci_values_from_tiles( - dates = dates, - tile_folder = tile_folder, - field_boundaries = field_boundaries, - field_boundaries_sf = field_boundaries_sf, - daily_CI_vals_dir = daily_CI_vals_dir, - cumulative_CI_vals_dir = cumulative_CI_vals_dir, - merged_final_dir = merged_final, - grid_size = grid_size - ) - - } else { - # Use legacy full-extent processing - log_message("No tiles found. Using legacy full-extent approach") - - # Use the existing utility function to find satellite images - existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter) - log_message(paste("Found", length(existing_files), "raster files for processing")) - - # Process raster files and create VRT - vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt) - - # Process and combine CI values - process_ci_values(dates, field_boundaries, merged_final, - field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir) - } - - }, error = function(e) { - log_message(paste("Error in main processing:", e$message), level = "ERROR") - stop(e$message) - }) - - # 6. REBUILD combined_CI_data.rds from ALL daily extracted files - # ----------------------------------------------- - # This ensures the combined file is complete and up-to-date - # even if extraction was skipped (because dates already existed) - # NOTE: Only rebuild if new dates were successfully extracted - # If all dates were missing from source, skip this step to avoid corrupting the file - log_message("\n===== HANDLING combined_CI_data.rds =====") - - if (length(dates_with_source) == 0 && length(missing_extraction) > 0) { - # All missing dates had no source data - skip combined_CI_data.rds update - log_message("⚠ No new dates extracted (all source data missing) - skipping combined_CI_data.rds update") - } else if (skip_extraction) { - # All dates already extracted - optionally rebuild for consistency - log_message("✓ All dates already extracted - combined_CI_data.rds is up-to-date") - } else { - # New dates were extracted - rebuild combined_CI_data.rds from ALL daily files - log_message("Rebuilding combined_CI_data.rds from all daily extracted files...") - - tryCatch({ - if (!dir.exists(daily_CI_vals_dir)) { - log_message("Daily CI directory does not exist yet", level = "WARNING") - } else { - # List ALL daily CI files (not just new ones) - all_daily_files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE) - - if (length(all_daily_files) == 0) { - log_message("No daily CI files found to combine", level = "WARNING") - } else { - log_message(paste("Combining all", length(all_daily_files), "daily CI files into combined_CI_data.rds")) - - # Load and combine ALL daily files (creates complete dataset) - combined_ci_path <- file.path(paths$cumulative_ci_vals_dir, "combined_CI_data.rds") - - combined_data <- all_daily_files %>% - purrr::map(readRDS) %>% - purrr::list_rbind() %>% - dplyr::group_by(sub_field) - - # Save the rebuilt combined data - saveRDS(combined_data, combined_ci_path) - - log_message(paste("✓ Rebuilt combined_CI_data.rds with", nrow(combined_data), "total rows")) - } - } - }, error = function(e) { - log_message(paste("⚠ Error rebuilding combined_CI_data.rds (will skip):", e$message), level = "WARNING") - log_message(" Note: This is OK - Script 30 will use growth model RDS instead", level = "WARNING") - }) - } -} - -if (sys.nframe() == 0) { - main() -} diff --git a/r_app/20_ci_extraction_per_field.R b/r_app/20_ci_extraction_per_field.R index 88313ec..63d128c 100644 --- a/r_app/20_ci_extraction_per_field.R +++ b/r_app/20_ci_extraction_per_field.R @@ -80,9 +80,18 @@ main <- function() { }) # Get list of dates to process - dates <- date_list(end_date, offset) - safe_log(sprintf("Processing dates: %s to %s (%d dates)", - dates$start_date, dates$end_date, length(dates$days_filter))) + # If in migration mode, dates_to_process is provided by the pipeline runner + if (exists("dates_to_process") && !is.null(dates_to_process)) { + # Migration mode: Use provided list of dates (process ALL available dates) + dates_filter <- sort(dates_to_process) + safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter))) + } else { + # Normal mode: Use 7-day offset window + dates <- date_list(end_date, offset) + dates_filter <- dates$days_filter + safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)", + dates$start_date, dates$end_date, length(dates_filter))) + } safe_log(sprintf("Input directory: %s", setup$field_tiles_dir)) safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir)) @@ -123,7 +132,7 @@ main <- function() { total_error <- 0 ci_results_by_date <- list() - for (date_str in dates$days_filter) { + for (date_str in dates_filter) { # Load the merged TIFF ONCE for this date merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str)) diff --git a/r_app/21_convert_ci_rds_to_csv.R b/r_app/21_convert_ci_rds_to_csv.R index 491aa7e..78af1bb 100644 --- a/r_app/21_convert_ci_rds_to_csv.R +++ b/r_app/21_convert_ci_rds_to_csv.R @@ -7,9 +7,9 @@ # models and Python ML workflows without requiring interpolated/modeled values. # # INPUT DATA: -# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds -# - Format: RDS (wide format: fields × dates with CI values) -# - Requirement: Script 20 must have completed CI extraction +# - Source: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds +# - Format: RDS (interpolated growth model data from Script 30) +# - Requirement: Script 30 must have completed growth model interpolation # # OUTPUT DATA: # - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/ @@ -36,12 +36,12 @@ # - Data directories: extracted_ci/cumulative_vals/ # # NOTES: -# - Transformation: Wide format (fields as rows, dates as columns) → Long format -# - Time series: Preserves all CI values without interpolation +# - Data source: Uses interpolated CI data from Script 30 (growth model output) +# - Handles both wide format and long format inputs from growth model # - DOY (Day of Year): Calculated from date for seasonal analysis # - Python integration: CSV format compatible with pandas/scikit-learn workflows # - Used by: Python harvest detection models (harvest_date_prediction.py) -# - Optional: Run only when exporting to Python for ML model training +# - Exports complete growth curves with interpolated values for ML training # # RELATED ISSUES: # SC-112: Utilities restructuring @@ -199,39 +199,56 @@ main <- function() { ci_data_source_dir <- paths$cumulative_ci_vals_dir ci_data_output_dir <- paths$ci_for_python_dir - input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds") + # Try to load interpolated growth model data from Script 30 + input_file <- file.path(ci_data_source_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds") output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv") # Check if input file exists if (!file.exists(input_file)) { - stop(paste("Input file not found:", input_file)) + stop(paste("Input file not found:", input_file, + "\nScript 30 (growth model) must be run before Script 21.")) } cat(sprintf("Loading: %s\n", input_file)) - # Load RDS file - ci_data_wide <- readRDS(input_file) %>% + # Load RDS file (from Script 30 - already in long format with interpolated values) + ci_data <- readRDS(input_file) %>% as_tibble() - cat(sprintf(" Loaded %d rows\n", nrow(ci_data_wide))) - cat(sprintf(" Format: WIDE (field, sub_field, then dates as columns)\n")) - cat(sprintf(" Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", "))) + cat(sprintf(" Loaded %d rows\n", nrow(ci_data))) + cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", "))) - # Step 1: Convert from WIDE to LONG format - cat("\nStep 1: Converting from wide to long format...\n") - ci_data_long <- wide_to_long_ci_data(ci_data_wide) + # Check format and prepare for export + # If it's already in long format (from Script 30), use as-is + # Otherwise, convert from wide to long + if ("Date" %in% names(ci_data) || "date" %in% names(ci_data)) { + cat(" Detected: LONG format (from growth model)\n") + ci_data_long <- ci_data + } else { + cat(" Detected: WIDE format - converting to long...\n") + ci_data_long <- wide_to_long_ci_data(ci_data) + } - # Step 2: Create complete daily sequences with interpolation - cat("\nStep 2: Creating complete daily sequences with interpolation...\n") - ci_data_python <- create_interpolated_daily_sequences(ci_data_long) + # Step 1: Ensure Date column exists and is properly formatted + ci_data_long <- ci_data_long %>% + mutate( + Date = as.Date(Date) + ) - # Step 3: Validate output - cat("\nStep 3: Validating output...") - validate_conversion_output(ci_data_python) + # Step 2: If interpolated values already present, use them; otherwise create interpolated sequences + if ("value" %in% names(ci_data_long)) { + # Already has interpolated values from Script 30 + cat("\nStep 2: Using interpolated values from growth model...\n") + ci_data_python <- ci_data_long + } else { + # Create interpolated daily sequences + cat("\nStep 2: Creating complete daily sequences with interpolation...\n") + ci_data_python <- create_interpolated_daily_sequences(ci_data_long) + } # Step 4: Save to CSV - cat(sprintf("\nStep 4: Saving to CSV...\n")) - cat(sprintf(" Output: %s\n", output_file)) + cat(sprintf("\nStep 4: Saving to CSV...\\n")) + cat(sprintf(" Output: %s\\n", output_file)) write_csv(ci_data_python, output_file) cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python))) diff --git a/r_app/30_growth_model_utils.R b/r_app/30_growth_model_utils.R index 81c10a8..c3cf386 100644 --- a/r_app/30_growth_model_utils.R +++ b/r_app/30_growth_model_utils.R @@ -115,15 +115,16 @@ load_combined_ci_data <- function(daily_vals_dir) { #' @param harvesting_data Dataframe with harvesting information #' @param field_CI_data Dataframe with CI measurements #' @param season Year of the growing season +#' @param verbose Logical: whether to log warnings/info (default TRUE). Set to FALSE during progress bar iteration. #' @return Dataframe with interpolated daily CI values #' -extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) { +extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, verbose = TRUE) { # Filter harvesting data for the given season and field name filtered_harvesting_data <- harvesting_data %>% dplyr::filter(year == season, sub_field == field_name) if (nrow(filtered_harvesting_data) == 0) { - safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING") + if (verbose) safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING") return(data.frame()) } @@ -133,7 +134,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) # Return an empty data frame if no CI data is found if (nrow(filtered_field_CI_data) == 0) { - safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING") + if (verbose) safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING") return(data.frame()) } @@ -157,12 +158,14 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) # If CI is empty after filtering, return an empty dataframe if (nrow(CI) == 0) { - safe_log(paste0("No CI data within season dates for field: ", field_name, - " (Season: ", season, ", dates: ", - format(season_start, "%Y-%m-%d"), " to ", - format(season_end, "%Y-%m-%d"), - "). Available CI data range: ", ci_date_range), - "WARNING") + if (verbose) { + safe_log(paste0("No CI data within season dates for field: ", field_name, + " (Season: ", season, ", dates: ", + format(season_start, "%Y-%m-%d"), " to ", + format(season_end, "%Y-%m-%d"), + "). Available CI data range: ", ci_date_range), + "WARNING") + } return(data.frame()) } @@ -175,20 +178,17 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) subField = field_name ) - # Log successful interpolation - safe_log(paste0("Successfully interpolated CI data for field: ", field_name, - " (Season: ", season, ", dates: ", - format(season_start, "%Y-%m-%d"), " to ", - format(season_end, "%Y-%m-%d"), - "). ", nrow(CI), " data points created.")) - + # Return data with success status return(CI) }, error = function(e) { - safe_log(paste0("Error interpolating CI data for field ", field_name, - " in season ", season, - " (", format(season_start, "%Y-%m-%d"), " to ", - format(season_end, "%Y-%m-%d"), - "): ", e$message), "ERROR") + # Return empty dataframe on error (will be tracked separately) + if (verbose) { + safe_log(paste0("Error interpolating CI data for field ", field_name, + " in season ", season, + " (", format(season_start, "%Y-%m-%d"), " to ", + format(season_end, "%Y-%m-%d"), + "): ", e$message), "ERROR") + } return(data.frame()) }) } @@ -203,17 +203,19 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) { safe_log("Starting CI data interpolation for all fields") + # Track failed fields for end-of-run summary + failed_fields <- list() + total_fields <- 0 + successful_fields <- 0 + # Process each year result <- purrr::map_df(years, function(yr) { - safe_log(paste("Processing year:", yr)) - # Get the fields harvested in this year with valid season start dates sub_fields <- harvesting_data %>% dplyr::filter(year == yr, !is.na(season_start)) %>% dplyr::pull(sub_field) if (length(sub_fields) == 0) { - safe_log(paste("No fields with valid season data for year:", yr), "WARNING") return(data.frame()) } @@ -222,24 +224,64 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) { purrr::keep(~ any(ci_data$sub_field == .x)) if (length(valid_sub_fields) == 0) { - safe_log(paste("No fields with CI data for year:", yr), "WARNING") return(data.frame()) } - # Extract and interpolate data for each valid field - safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr)) + # Initialize progress bar for this year + total_fields <<- total_fields + length(valid_sub_fields) + pb <- txtProgressBar(min = 0, max = length(valid_sub_fields), style = 3, width = 50) + counter <- 0 - result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x, - harvesting_data = harvesting_data, - field_CI_data = ci_data, - season = yr)) %>% - purrr::list_rbind() + # Extract and interpolate data for each valid field with progress bar + result_list <- list() + for (field in valid_sub_fields) { + counter <- counter + 1 + setTxtProgressBar(pb, counter) + + # Call with verbose=FALSE to suppress warnings during progress bar iteration + field_result <- extract_CI_data(field, + harvesting_data = harvesting_data, + field_CI_data = ci_data, + season = yr, + verbose = FALSE) + + if (nrow(field_result) > 0) { + successful_fields <<- successful_fields + 1 + result_list[[field]] <- field_result + } else { + # Track failed field + failed_fields[[length(failed_fields) + 1]] <<- list( + field = field, + season = yr, + reason = "Unable to generate interpolated data" + ) + } + } + close(pb) + cat("\n") # Newline after progress bar - safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr)) - return(result) + # Combine all results for this year + if (length(result_list) > 0) { + purrr::list_rbind(result_list) + } else { + data.frame() + } }) - safe_log(paste("Total interpolated data points:", nrow(result))) + # Print summary + safe_log(sprintf("\n=== Interpolation Summary ===")) + safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields)) + + if (length(failed_fields) > 0) { + safe_log(sprintf("Failed to interpolate: %d fields", length(failed_fields))) + for (failure in failed_fields) { + safe_log(sprintf(" - Field %s (Season %d): %s", + failure$field, failure$season, failure$reason), "WARNING") + } + } + + safe_log(sprintf("Total interpolated data points: %d", nrow(result))) + return(result) } diff --git a/r_app/40_mosaic_creation.R b/r_app/40_mosaic_creation.R deleted file mode 100644 index cdf269e..0000000 --- a/r_app/40_mosaic_creation.R +++ /dev/null @@ -1,296 +0,0 @@ -# ============================================================================ -# SCRIPT 40: Weekly Mosaic Creation (CI Band Aggregation) -# ============================================================================ -# PURPOSE: -# Create weekly 5-band (R, G, B, NIR, CI) mosaics from daily satellite -# imagery. Aggregates multi-day CI data into single weekly composite raster -# for field-level analysis. Supports per-field or single-file architectures. -# -# INPUT DATA: -# - Daily per-field TIFFs: laravel_app/storage/app/{project}/daily_tiles/{YYYY-MM-DD}/*.tif -# (or single-file mosaics: merged_tif/{YYYY-MM-DD}.tif + pivot.geojson masking) -# - CI data (RDS): laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds -# - Field boundaries: laravel_app/storage/app/{project}/pivot.geojson -# -# OUTPUT DATA: -# - Destination: laravel_app/storage/app/{project}/weekly_mosaic/ -# - Format: 5-band GeoTIFF (uint16) -# - Naming: week_{WW}.tif (week number + year, e.g., week_35_2025.tif) -# - Spatial: Raster aligned to field boundaries; CRS preserved -# -# USAGE: -# Rscript 40_mosaic_creation.R [end_date] [offset] [project] [file_name] [data_source] -# -# Example (Windows PowerShell): -# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 aura -# -# PARAMETERS: -# - end_date: End date (YYYY-MM-DD format); required for weekly aggregation -# - offset: Days to look back (typically 7 for one week) -# - project: Project name (aura, angata, chemba, xinavane, esa, simba) -# - file_name: Custom output filename (optional; default: week_{WW}_{YYYY}.tif) -# - data_source: Data folder (optional; auto-detects merged_tif or merged_tif_8b) -# -# CLIENT TYPES: -# - cane_supply (ANGATA): Yes - harvest readiness timeline depends on weekly mosaic -# - agronomic_support (AURA): Yes - KPI calculation requires weekly CI bands -# -# DEPENDENCIES: -# - Packages: sf, terra, tidyverse, lubridate, here -# - Utils files: parameters_project.R, 00_common_utils.R, 40_mosaic_creation_utils.R -# - Input data: Daily per-field TIFFs (Script 10) + CI extraction (Script 20) -# - Data: field boundaries (pivot.geojson), harvest dates (if available) -# -# NOTES: -# - Weekly aggregation: Combines 7 days of daily data into single composite -# - 5-band output: R, G, B, NIR, and Canopy Index (CI) derived from NDVI -# - Tiling support: Handles per-field TIFF architecture; auto-mosaics if needed -# - Data source auto-detection: Searches merged_tif/ or merged_tif_8b/ folders -# - Command-line driven: Designed for batch scheduling (cron/Task Scheduler) -# - Downstream: Script 80 (KPI calculation) depends on weekly_mosaic/ output -# - Performance: Multi-file mosaicing (~25 fields) takes 5-10 minutes per week -# -# RELATED ISSUES: -# SC-113: Script header standardization -# SC-112: Utilities restructuring -# SC-111: Script 10 geometry validation -# -# ============================================================================ - -# 1. Load required packages -# ----------------------- -suppressPackageStartupMessages({ - # File path handling - library(here) # For relative path resolution (platform-independent file paths) - - # Spatial data handling - library(sf) # For spatial operations (field boundary masking) - library(terra) # For raster operations (reading/writing/stacking GeoTIFFs) - - # Data manipulation - library(tidyverse) # For dplyr, readr (data wrangling) - library(lubridate) # For date/time operations (week extraction, date formatting) -}) - -# 2. Process command line arguments and run mosaic creation -# ------------------------------------------------------ -main <- function() { - # Capture command line arguments - args <- commandArgs(trailingOnly = TRUE) - - # Process project_dir argument with default - if (length(args) >= 3 && !is.na(args[3])) { - project_dir <- as.character(args[3]) - } else if (exists("project_dir", envir = .GlobalEnv)) { - project_dir <- get("project_dir", envir = .GlobalEnv) - } else { - # Default project directory - project_dir <- "angata" - message("No project_dir provided. Using default:", project_dir) - } - - # Make project_dir available globally so parameters_project.R can use it - assign("project_dir", project_dir, envir = .GlobalEnv) - - # Process end_date argument with default - if (length(args) >= 1 && !is.na(args[1])) { - # Parse date explicitly in YYYY-MM-DD format from command line - end_date <- as.Date(args[1], format = "%Y-%m-%d") - if (is.na(end_date)) { - message("Invalid end_date provided. Using current date.") - end_date <- Sys.Date() - } - } else if (exists("end_date_str", envir = .GlobalEnv)) { - end_date <- as.Date(get("end_date_str", envir = .GlobalEnv)) - } else { - # Default to current date if no argument is provided - end_date <- Sys.Date() - message("No end_date provided. Using current date: ", format(end_date)) - } - - # Process offset argument with default - if (length(args) >= 2 && !is.na(args[2])) { - offset <- as.numeric(args[2]) - if (is.na(offset) || offset <= 0) { - message("Invalid offset provided. Using default (7 days).") - offset <- 7 - } - } else { - # Default to 7 days if no argument is provided - offset <- 7 - message("No offset provided. Using default:", offset, "days") - } - - # Process data_source argument (optional, passed from pipeline) - # If provided, use it; otherwise auto-detect - data_source_from_args <- NULL - if (length(args) >= 5 && !is.na(args[5]) && nchar(args[5]) > 0) { - data_source_from_args <- as.character(args[5]) - message("Data source explicitly provided via arguments: ", data_source_from_args) - } - - # 3. Initialize project configuration - # -------------------------------- - - # Detect which data source directory exists (merged_tif or merged_tif_8b) - # IMPORTANT: Only consider a folder as valid if it contains actual files - laravel_storage <- here::here("laravel_app/storage/app", project_dir) - - # Load centralized path structure - tryCatch({ - source("r_app/parameters_project.R") - paths <- setup_project_directories(project_dir) - }, error = function(e) { - message("Note: Could not open files from r_app directory") - message("Attempting to source from default directory instead...") - tryCatch({ - source("parameters_project.R") - paths <- setup_project_directories(project_dir) - message("✓ Successfully sourced files from default directory") - }, error = function(e) { - stop("Failed to source required files from both 'r_app' and default directories.") - }) - }) - data_source <- if (has_8b_data) { - message("Auto-detected data source: merged_tif_8b (8-band optimized) - contains files") - "merged_tif_8b" - } else if (has_legacy_data) { - message("Auto-detected data source: merged_tif (legacy 4-band) - contains files") - "merged_tif" - } else { - message("Warning: No valid data source found (both folders empty or missing). Using default: merged_tif") - "merged_tif" - } - } - - # Set global data_source for parameters_project.R - assign("data_source", data_source, envir = .GlobalEnv) - - tryCatch({ - source("r_app/parameters_project.R") - source("r_app/00_common_utils.R") - source("r_app/40_mosaic_creation_utils.R") - safe_log(paste("Successfully sourced files from 'r_app' directory.")) - }, error = function(e) { - message("Note: Could not open files from r_app directory") - message("Attempting to source from default directory instead...") - tryCatch({ - source("parameters_project.R") - paths <- setup_project_directories(project_dir) - message("✓ Successfully sourced files from default directory") - }, error = function(e) { - stop("Failed to source required files from both 'r_app' and default directories.") - }) - }) - - # Use centralized paths (no need to manually construct or create dirs) - merged_final <- paths$growth_model_interpolated_dir # or merged_final_tif if needed - daily_vrt <- paths$vrt_dir - - safe_log(paste("Using growth model/mosaic directory:", merged_final)) - safe_log(paste("Using daily VRT directory:", daily_vrt)) - - # 4. Generate date range for processing - # --------------------------------- - dates <- date_list(end_date, offset) - safe_log(paste("Processing data for week", dates$week, "of", dates$year)) - - # Create output filename - # Only use custom filename if explicitly provided (not empty string) - file_name_tif <- if (length(args) >= 4 && !is.na(args[4]) && nchar(args[4]) > 0) { - as.character(args[4]) - } else { - paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif") - } - - safe_log(paste("Output will be saved as:", file_name_tif)) - - # 5. Create weekly mosaics - route based on project tile detection - # --------------------------------------------------------------- - # The use_tile_mosaic flag is auto-detected by parameters_project.R - # based on whether tiles exist in merged_final_tif/ - - if (!exists("use_tile_mosaic")) { - # Fallback detection if flag not set (shouldn't happen) - merged_final_dir <- file.path(laravel_storage, "merged_final_tif") - tile_detection <- detect_tile_structure_from_merged_final(merged_final_dir) - use_tile_mosaic <- tile_detection$has_tiles - } - - if (use_tile_mosaic) { - # TILE-BASED APPROACH: Create per-tile weekly MAX mosaics - # This is used for projects like Angata with large ROIs requiring spatial partitioning - # Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20) - tryCatch({ - safe_log("Starting per-tile mosaic creation (tile-based approach)...") - - # Detect grid size from merged_final_tif folder structure - # Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc. - merged_final_base <- file.path(laravel_storage, "merged_final_tif") - grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE) - # Look for grid size patterns like "5x5", "10x10", "20x20" - grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) - - if (length(grid_patterns) == 0) { - stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)") - } - - grid_size <- grid_patterns[1] # Use first grid size found - safe_log(paste("Detected grid size:", grid_size)) - - # Point to the grid-specific merged_final_tif directory - merged_final_with_grid <- file.path(merged_final_base, grid_size) - - # Set output directory for per-tile mosaics, organized by grid size (from centralized paths) - # Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif - tile_output_base <- file.path(paths$weekly_tile_max_dir, grid_size) - # Note: no dir.create needed - paths$weekly_tile_max_dir already created by setup_project_directories() - dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE) # Create grid-size subfolder - - created_tile_files <- create_weekly_mosaic_from_tiles( - dates = dates, - merged_final_dir = merged_final_with_grid, - tile_output_dir = tile_output_base, - field_boundaries = field_boundaries - ) - - safe_log(paste("✓ Per-tile mosaic creation completed - created", - length(created_tile_files), "tile files")) - }, error = function(e) { - safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR") - traceback() - stop("Mosaic creation failed") - }) - - } else { - # SINGLE-FILE APPROACH: Create single weekly mosaic file - # This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output - tryCatch({ - safe_log("Starting single-file mosaic creation (backward-compatible approach)...") - - # Set output directory for single-file mosaics (from centralized paths) - single_file_output_dir <- paths$weekly_mosaic_dir - - created_file <- create_weekly_mosaic( - dates = dates, - field_boundaries = field_boundaries, - daily_vrt_dir = daily_vrt, - merged_final_dir = merged_final, - output_dir = single_file_output_dir, - file_name_tif = file_name_tif, - create_plots = FALSE - ) - - safe_log(paste("✓ Single-file mosaic creation completed:", created_file)) - }, error = function(e) { - safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR") - traceback() - stop("Mosaic creation failed") - }) - } -} - -if (sys.nframe() == 0) { - main() -} - \ No newline at end of file diff --git a/r_app/40_mosaic_creation_per_field.R b/r_app/40_mosaic_creation_per_field.R index 9a16b8c..f7342d3 100644 --- a/r_app/40_mosaic_creation_per_field.R +++ b/r_app/40_mosaic_creation_per_field.R @@ -165,6 +165,13 @@ main <- function() { dates <- date_list(end_date, offset) + # Validate week calculation + message(sprintf("[INFO] Requested offset: %d days", offset)) + message(sprintf("[INFO] End date: %s", format(end_date, "%Y-%m-%d"))) + message(sprintf("[INFO] Start date: %s", format(dates$start_date, "%Y-%m-%d"))) + message(sprintf("[INFO] Calculating ISO week: %d", dates$week)) + message(sprintf("[INFO] Calculating ISO year: %d", dates$year)) + # ==== Create Per-Field Weekly Mosaics ==== created_files <- create_all_field_weekly_mosaics( diff --git a/r_app/40_mosaic_creation_per_field_utils.R b/r_app/40_mosaic_creation_per_field_utils.R index 821b02f..c1e787e 100644 --- a/r_app/40_mosaic_creation_per_field_utils.R +++ b/r_app/40_mosaic_creation_per_field_utils.R @@ -42,6 +42,15 @@ date_list <- function(end_date, offset) { week <- lubridate::isoweek(end_date) year <- lubridate::isoyear(end_date) + # Validate: Check that all dates in range belong to same ISO week + start_week <- lubridate::isoweek(start_date) + start_year <- lubridate::isoyear(start_date) + + if (start_week != week || start_year != year) { + safe_log(sprintf("WARNING: Date range spans multiple ISO weeks! Start: week %d/%d, End: week %d/%d. Using END date week %d/%d.", + start_week, start_year, week, year, week, year), "WARNING") + } + days_filter <- seq(from = start_date, to = end_date, by = "day") days_filter <- format(days_filter, "%Y-%m-%d") @@ -117,7 +126,6 @@ find_per_field_tiffs_for_week <- function(field_tiles_ci_dir, days_filter) { create_field_weekly_composite <- function(tiff_files, field_name) { if (length(tiff_files) == 0) { - safe_log(paste("No TIFF files for field:", field_name), "WARNING") return(NULL) } @@ -129,35 +137,30 @@ create_field_weekly_composite <- function(tiff_files, field_name) { r <- terra::rast(file) rasters[[length(rasters) + 1]] <- r }, error = function(e) { - safe_log(paste("Warning: Could not load", basename(file), "for field", field_name), "WARNING") + # Silently skip load errors (they're already counted) }) } if (length(rasters) == 0) { - safe_log(paste("Failed to load any rasters for field:", field_name), "ERROR") return(NULL) } # Create MAX composite if (length(rasters) == 1) { composite <- rasters[[1]] - safe_log(paste(" Field", field_name, "- single day (no compositing needed)")) } else { # Stack all rasters and apply MAX per pixel per band collection <- terra::sprc(rasters) composite <- terra::mosaic(collection, fun = "max") - safe_log(paste(" Field", field_name, "- MAX composite from", length(rasters), "days")) } # Ensure 5 bands with expected names if (terra::nlyr(composite) >= 5) { composite <- terra::subset(composite, 1:5) names(composite) <- c("Red", "Green", "Blue", "NIR", "CI") - } else { - safe_log(paste("Warning: Field", field_name, "has", terra::nlyr(composite), - "bands (expected 5)"), "WARNING") } + return(composite) }, error = function(e) { @@ -190,11 +193,9 @@ save_field_weekly_mosaic <- function(raster, output_dir, field_name, week, year) filename <- sprintf("week_%02d_%04d.tif", week, year) file_path <- file.path(field_output_dir, filename) - # Save raster + # Save raster (silently) terra::writeRaster(raster, file_path, overwrite = TRUE) - safe_log(paste(" Saved:", basename(field_output_dir), "/", filename)) - return(file_path) }, error = function(e) { @@ -229,8 +230,13 @@ create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_di created_files <- character() + # Initialize progress bar + pb <- txtProgressBar(min = 0, max = length(field_tiffs), style = 3, width = 50) + counter <- 0 + # Process each field for (field_name in names(field_tiffs)) { + counter <- counter + 1 tiff_files <- field_tiffs[[field_name]] # Create composite @@ -250,8 +256,12 @@ create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_di created_files <- c(created_files, saved_path) } } + + setTxtProgressBar(pb, counter) } + close(pb) + cat("\n") # New line after progress bar safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics")) return(created_files) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index ad74c15..cd39994 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -48,10 +48,6 @@ # - Critical dependency for Scripts 90/91 (reporting/dashboards) # - Uses Moran's I for spatial clustering detection (weed/stress patterns) # -# RELATED ISSUES: -# SC-112: Script 80 utilities restructuring (common + client-aware modules) -# SC-108: Core pipeline improvements -# SC-100: KPI definition and formula documentation # # ============================================================================ # [✓] Extract planting dates per field @@ -320,7 +316,6 @@ main <- function() { message("Output Formats:", paste(client_config$outputs, collapse = ", ")) # Use centralized paths from setup object (no need for file.path calls) - weekly_tile_max <- setup$weekly_tile_max_dir weekly_mosaic <- setup$weekly_mosaic_dir daily_vals_dir <- setup$daily_ci_vals_dir @@ -394,96 +389,66 @@ main <- function() { message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") message(strrep("=", 70)) + # Set reports_dir for CANE_SUPPLY workflow (used by export functions) + reports_dir <- setup$kpi_reports_dir + data_dir <- setup$data_dir + # Continue with existing per-field analysis code below message("\n", strrep("-", 70)) - message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)") + message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") message(strrep("-", 70)) current_week <- as.numeric(format(end_date, "%V")) # ISO week number (1-53) year <- as.numeric(format(end_date, "%G")) # Use ISO week year (%G) to match Script 40's mosaic naming # Calculate previous week using authoritative helper (handles year boundaries correctly) - source("r_app/80_weekly_stats_utils.R") # Load helper function + # Function already loaded from 80_utils_common.R sourced earlier previous_info <- calculate_target_week_and_year(current_week, year, offset_weeks = 1) previous_week <- previous_info$week previous_year <- previous_info$year message(paste("Week:", current_week, "/ Year (ISO):", year)) - # Find mosaic files - support both tile-based AND single-file approaches - message("Finding mosaic files...") - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, year) + # Find per-field weekly mosaics + message("Finding per-field weekly mosaics...") single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year) - # PRIORITY 1: Check for tile-based mosaics (projects with large ROI) - detected_grid_size <- NA - mosaic_dir <- NA - mosaic_mode <- NA + if (!dir.exists(weekly_mosaic)) { + stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, + "\nScript 40 (mosaic creation) must be run first.")) + } - if (dir.exists(weekly_tile_max)) { - subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - detected_grid_size <- grid_patterns[1] - mosaic_dir <- file.path(weekly_tile_max, detected_grid_size) - tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) - - if (length(tile_files) > 0) { - message(paste(" ✓ Using tile-based approach (grid-size:", detected_grid_size, ")")) - message(paste(" Found", length(tile_files), "tiles")) - mosaic_mode <- "tiled" - } + field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + if (length(field_dirs) == 0) { + stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, + "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) + } + + # Verify we have mosaics for this week + single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year) + per_field_files <- c() + for (field in field_dirs) { + field_mosaic_dir <- file.path(weekly_mosaic, field) + files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(files) > 0) { + per_field_files <- c(per_field_files, files) } } - # PRIORITY 2: Check for per-field mosaics (NEW per-field architecture) - if (is.na(mosaic_mode)) { - message(" No tiles found. Checking for per-field mosaics...") - # Check if weekly_mosaic has field subdirectories - if (dir.exists(weekly_mosaic)) { - field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) - field_dirs <- field_dirs[field_dirs != ""] - - if (length(field_dirs) > 0) { - # Check if any field has the week pattern we're looking for - per_field_files <- c() - for (field in field_dirs) { - field_mosaic_dir <- file.path(weekly_mosaic, field) - files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) - if (length(files) > 0) { - per_field_files <- c(per_field_files, files) - } - } - - if (length(per_field_files) > 0) { - message(paste(" ✓ Using per-field mosaic approach")) - message(paste(" Found", length(per_field_files), "per-field mosaics")) - mosaic_mode <- "per-field" - mosaic_dir <- weekly_mosaic # Will be field subdirectories - } - } - } + if (length(per_field_files) == 0) { + stop(paste("ERROR: No mosaics found for week", current_week, "year", year, + "\nExpected pattern:", single_file_pattern, + "\nChecked:", weekly_mosaic)) } - # PRIORITY 3: Fall back to single-file mosaic (legacy approach) - if (is.na(mosaic_mode)) { - message(" No per-field mosaics found. Checking for single-file mosaic (legacy approach)...") - mosaic_dir <- weekly_mosaic - single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE) - - if (length(single_file) > 0) { - message(paste(" ✓ Using single-file approach")) - message(paste(" Found 1 mosaic file:", basename(single_file[1]))) - mosaic_mode <- "single-file" - } else { - stop(paste("ERROR: No mosaic files found for week", current_week, year, - "\n Checked (1) tile-based:", file.path(weekly_tile_max, "*", "week_*.tif"), - "\n Checked (2) per-field:", file.path(weekly_mosaic, "*", "week_*.tif"), - "\n Checked (3) single-file:", file.path(weekly_mosaic, "week_*.tif"))) - } - } + message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) - message(paste(" Using mosaic mode:", mosaic_mode)) + mosaic_mode <- "per-field" + mosaic_dir <- weekly_mosaic + + # Load field boundaries tryCatch({ @@ -551,44 +516,15 @@ main <- function() { ) } - # SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile - # ============================================================================ - # NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends - # ============================================================================ - - # Build tile grid (needed by calculate_field_statistics) + # Build per-field configuration message("\nPreparing mosaic configuration for statistics calculation...") + message(" ✓ Using per-field mosaic architecture (1 TIFF per field)") - # For tile-based mosaics: build the grid mapping - # For single-file: create a minimal grid structure (single "tile" = entire mosaic) - if (mosaic_mode == "tiled") { - tile_grid <- build_tile_grid(mosaic_dir, current_week, year) - message(paste(" ✓ Built tile grid with", nrow(tile_grid), "tiles")) - } else { - # Single-file mode: create a minimal grid with just the single mosaic - message(" ✓ Using single-file mosaic (no tile grid needed)") - single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year) - single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE) - - if (length(single_file) == 0) { - stop("ERROR: Single-file mosaic not found in", mosaic_dir) - } - - # Create a minimal tile_grid structure with one "tile" representing the entire mosaic - tile_grid <- list( - mosaic_dir = mosaic_dir, - data = data.frame( - id = 0, # Single tile ID = 0 (full extent) - xmin = NA_real_, - xmax = NA_real_, - ymin = NA_real_, - ymax = NA_real_, - stringsAsFactors = FALSE - ), - mode = "single-file", - file = single_file[1] - ) - } + # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif + field_grid <- list( + mosaic_dir = mosaic_dir, + mode = "per-field" + ) message("\nUsing modular RDS-based approach for weekly statistics...") @@ -599,7 +535,7 @@ main <- function() { year = year, project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, - mosaic_dir = tile_grid$mosaic_dir, + mosaic_dir = field_grid$mosaic_dir, reports_dir = reports_dir, report_date = end_date ) @@ -617,7 +553,7 @@ main <- function() { year = previous_year, project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, - mosaic_dir = tile_grid$mosaic_dir, + mosaic_dir = field_grid$mosaic_dir, reports_dir = reports_dir, report_date = prev_report_date ) diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 705ed23..f588e96 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -660,85 +660,101 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year, message(paste("Calculating statistics for all fields - Week", week_num, year)) - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) + # Per-field mode: look in per-field subdirectories single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year) - tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) - if (length(tile_files) == 0) { - single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE) - if (length(single_file) > 0) { - message(paste(" Using single-file mosaic for week", week_num)) - tile_files <- single_file[1] - } else { - stop(paste("No mosaic files found for week", week_num, year, "in", mosaic_dir)) + # Find all field subdirectories with mosaics for this week + field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + per_field_files <- list() + for (field in field_dirs) { + field_mosaic_dir <- file.path(mosaic_dir, field) + files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(files) > 0) { + per_field_files[[field]] <- files[1] # Take first match for this field } } - message(paste(" Found", length(tile_files), "mosaic file(s) for week", week_num)) + if (length(per_field_files) == 0) { + stop(paste("No per-field mosaic files found for week", week_num, year, "in", mosaic_dir)) + } + + message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num)) results_list <- list() - for (tile_idx in seq_along(tile_files)) { - tile_file <- tile_files[tile_idx] + # Process each field's mosaic + for (field_idx in seq_along(per_field_files)) { + field_name <- names(per_field_files)[field_idx] + field_file <- per_field_files[[field_name]] + tryCatch({ - current_rast <- terra::rast(tile_file) + current_rast <- terra::rast(field_file) ci_band <- current_rast[["CI"]] if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { - message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found")) - return(NULL) + message(paste(" [SKIP] Field", field_name, "- CI band not found")) + next } - extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE) - unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)]) + # Extract CI values for this field + field_boundary <- field_boundaries_sf[field_boundaries_sf$field == field_name, ] - for (field_poly_idx in unique_field_ids) { - field_id <- field_boundaries_sf$field[field_poly_idx] - ci_vals <- extracted$CI[extracted$ID == field_poly_idx] - ci_vals <- ci_vals[!is.na(ci_vals)] - - if (length(ci_vals) == 0) next - - mean_ci <- mean(ci_vals, na.rm = TRUE) - ci_std <- sd(ci_vals, na.rm = TRUE) - cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ - range_min <- min(ci_vals, na.rm = TRUE) - range_max <- max(ci_vals, na.rm = TRUE) - range_str <- sprintf("%.1f-%.1f", range_min, range_max) - ci_percentiles_str <- get_ci_percentiles(ci_vals) - - GERMINATION_CI_THRESHOLD <- 2.0 - num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE) - num_pixels_total <- length(ci_vals) - pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 - - field_rows <- extracted[extracted$ID == field_poly_idx, ] - num_total <- nrow(field_rows) - num_data <- sum(!is.na(field_rows$CI)) - pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 - cloud_cat <- if (num_data == 0) "No image available" - else if (pct_clear >= 95) "Clear view" - else "Partial coverage" - - existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) - if (length(existing_idx) > 0) next - - results_list[[length(results_list) + 1]] <- data.frame( - Field_id = field_id, - Mean_CI = round(mean_ci, 2), - CV = round(cv * 100, 2), - CI_range = range_str, - CI_Percentiles = ci_percentiles_str, - Pct_pixels_CI_gte_2 = pct_pixels_gte_2, - Cloud_pct_clear = pct_clear, - Cloud_category = cloud_cat, - stringsAsFactors = FALSE - ) + if (nrow(field_boundary) == 0) { + message(paste(" [SKIP] Field", field_name, "- not in field boundaries")) + next } - message(paste(" Tile", tile_idx, "of", length(tile_files), "processed")) + extracted <- terra::extract(ci_band, field_boundary, na.rm = FALSE) + + if (nrow(extracted) == 0 || all(is.na(extracted$CI))) { + message(paste(" [SKIP] Field", field_name, "- no CI values found")) + next + } + + ci_vals <- extracted$CI[!is.na(extracted$CI)] + + if (length(ci_vals) == 0) { + next + } + + # Calculate statistics + mean_ci <- mean(ci_vals, na.rm = TRUE) + ci_std <- sd(ci_vals, na.rm = TRUE) + cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ + range_min <- min(ci_vals, na.rm = TRUE) + range_max <- max(ci_vals, na.rm = TRUE) + range_str <- sprintf("%.1f-%.1f", range_min, range_max) + ci_percentiles_str <- get_ci_percentiles(ci_vals) + + num_pixels_total <- length(ci_vals) + num_pixels_gte_2 <- sum(ci_vals >= 2) + pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 + + num_total <- nrow(extracted) + num_data <- sum(!is.na(extracted$CI)) + pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 + cloud_cat <- if (num_data == 0) "No image available" + else if (pct_clear >= 95) "Clear view" + else "Partial coverage" + + # Add to results + results_list[[length(results_list) + 1]] <- data.frame( + Field_id = field_name, + Mean_CI = round(mean_ci, 2), + CV = round(cv * 100, 2), + CI_range = range_str, + CI_Percentiles = ci_percentiles_str, + Pct_pixels_CI_gte_2 = pct_pixels_gte_2, + Cloud_pct_clear = pct_clear, + Cloud_category = cloud_cat, + stringsAsFactors = FALSE + ) + + message(paste(" Field", field_idx, "of", length(per_field_files), "processed")) }, error = function(e) { - message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message)) + message(paste(" [ERROR] Field", field_name, ":", e$message)) }) } diff --git a/r_app/90_CI_report_with_kpis_simple.Rmd b/r_app/90_CI_report_with_kpis_simple.Rmd index 1fc6b89..ee251a7 100644 --- a/r_app/90_CI_report_with_kpis_simple.Rmd +++ b/r_app/90_CI_report_with_kpis_simple.Rmd @@ -3,7 +3,7 @@ params: ref: "word-styles-reference-var1.docx" output_file: CI_report.docx report_date: "2025-09-30" - data_dir: "aura" + data_dir: "angata" mail_day: "Wednesday" borders: FALSE ci_plot_type: "both" # options: "absolute", "cumulative", "both" @@ -107,8 +107,9 @@ project_dir <- params$data_dir # Source project parameters with error handling tryCatch({ source(here::here("r_app", "parameters_project.R")) + source(here::here("r_app", "00_common_utils.R")) }, error = function(e) { - stop("Error loading parameters_project.R: ", e$message) + stop("Error loading project utilities: ", e$message) }) # Load centralized paths @@ -363,7 +364,7 @@ safe_log(paste("Week range:", week_start, "to", week_end)) ```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE} # Load CI quadrant data for field-level analysis tryCatch({ - CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) + CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) safe_log("Successfully loaded CI quadrant data") }, error = function(e) { stop("Error loading CI quadrant data: ", e$message) @@ -840,7 +841,7 @@ The following table provides a comprehensive overview of all monitored fields wi ```{r detailed_field_table, echo=FALSE, results='asis'} # Load CI quadrant data to get field ages -CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) +CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) # Identify the current season for each field based on report_date # The current season is the one where the report_date falls within or shortly after the season diff --git a/r_app/91_CI_report_with_kpis_Angata.Rmd b/r_app/91_CI_report_with_kpis_Angata.Rmd index fff958b..9aac76f 100644 --- a/r_app/91_CI_report_with_kpis_Angata.Rmd +++ b/r_app/91_CI_report_with_kpis_Angata.Rmd @@ -3,7 +3,7 @@ params: ref: "word-styles-reference-var1.docx" output_file: CI_report.docx report_date: "2025-09-30" - data_dir: "aura" + data_dir: "angata" mail_day: "Wednesday" borders: FALSE ci_plot_type: "both" # options: "absolute", "cumulative", "both" @@ -110,6 +110,13 @@ tryCatch({ stop("Error loading parameters_project.R: ", e$message) }) +# Source common utilities for logging and helper functions +tryCatch({ + source(here::here("r_app", "00_common_utils.R")) +}, error = function(e) { + stop("Error loading 00_common_utils.R: ", e$message) +}) + # Load centralized paths paths <- setup_project_directories(project_dir) @@ -480,7 +487,7 @@ safe_log(paste("Week range:", week_start, "to", week_end)) ```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE} # Load CI index data with error handling tryCatch({ - CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) + CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) safe_log("Successfully loaded CI quadrant data") }, error = function(e) { diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index d9839f2..5e6b2e9 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -301,6 +301,9 @@ load_field_boundaries <- function(data_dir) { tryCatch({ boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE) + # Filter out features with empty geometries + boundaries_sf <- boundaries_sf[!st_is_empty(boundaries_sf), ] + # Repair geometries if needed if (!all(sf::st_is_valid(boundaries_sf))) { boundaries_sf <- sf::st_make_valid(boundaries_sf) diff --git a/r_app/run_full_pipeline.R b/r_app/run_full_pipeline.R index 21e3f78..65d162f 100644 --- a/r_app/run_full_pipeline.R +++ b/r_app/run_full_pipeline.R @@ -5,520 +5,302 @@ # 1. Python: Download Planet images # 2. R 10: Create master grid and split TIFFs # 3. R 20: CI Extraction -# 4. R 21: Convert CI RDS to CSV -# 5. R 30: Interpolate growth model +# 4. R 30: Interpolate growth model +# 5. R 21: Convert CI RDS to CSV (uses Script 30 output) # 6. Python 31: Harvest imminent weekly # 7. R 40: Mosaic creation # 8. R 80: Calculate KPIs +# 9. R 90 (Agronomic) OR R 91 (Cane Supply): Generate Word Report # # ============================================================================== # HOW TO RUN THIS SCRIPT # ============================================================================== -# +# # Run from the smartcane/ directory: -# +# # Option 1 (Recommended - shows real-time output): # Rscript r_app/run_full_pipeline.R -# +# # Option 2 (Full path to Rscript - use & in PowerShell for paths with spaces): # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R -# +# # Option 3 (Batch mode - output saved to .Rout file): # R CMD BATCH --vanilla r_app/run_full_pipeline.R -# +# # ============================================================================== # ============================================================================== # *** EDIT THESE VARIABLES *** -end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date() -project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba" -data_source <- "merged_tif" # Standard data source directory -force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist +end_date <- as.Date("2026-02-04") # or specify: as.Date("2026-01-27") , Sys.Date() +offset <- 7 # days to look back +project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba" +force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist +migrate_legacy_format <- TRUE # Set to TRUE to migrate from old format (merged_tif/merged_tif_8b) to new format (field_tiles) +# *** NOTE: data_source is now unified - all projects use field_tiles after migration *** # *************************** -# Define Rscript path for running external R scripts via system() -RSCRIPT_PATH <- file.path("C:", "Program Files", "R", "R-4.4.3", "bin", "x64", "Rscript.exe") - -# Load client type mapping and centralized paths from parameters_project.R -source("r_app/parameters_project.R") -source("r_app/00_common_utils.R") -paths <- setup_project_directories(project_dir) -client_type <- get_client_type(project_dir) -cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type)) - -# ============================================================================== -# DETERMINE REPORTING WINDOW (auto-calculated based on KPI requirements) -# ============================================================================== -# Script 80 (KPIs) needs N weeks of historical data for trend analysis and reporting -# We calculate this automatically based on client type -reporting_weeks_needed <- 8 # CRITICAL: Need 8 weeks for 8-week trend analysis (Script 80 requirement) -offset <- reporting_weeks_needed * 7 # Convert weeks to days (8 weeks = 56 days) - -cat(sprintf("\n[INFO] Reporting window: %d weeks (%d days of data)\n", reporting_weeks_needed, offset)) -wwy_current <- get_iso_week_year(end_date) -cat(sprintf(" Running week: %02d / %d\n", wwy_current$week, wwy_current$year)) -cat(sprintf(" Date range: %s to %s\n", format(end_date - offset, "%Y-%m-%d"), format(end_date, "%Y-%m-%d"))) - - # Format dates end_date_str <- format(as.Date(end_date), "%Y-%m-%d") # Track success of pipeline pipeline_success <- TRUE -# ============================================================================== -# EARLY PREREQ CHECK: Verify mosaic requirements BEFORE any downloads -# ============================================================================== -# This determines if we need more weeks of data than the initial reporting window -# Run this BEFORE downloads so we can download ONLY missing dates upfront -cat("\n========== EARLY CHECK: MOSAIC REQUIREMENTS FOR REPORTING WINDOW ==========\n") - -# Detect mosaic mode early (centralized function in parameters_project.R) -mosaic_mode <- detect_mosaic_mode(project_dir) - -# Check what mosaics we NEED -weeks_needed <- data.frame() -for (weeks_back in 0:(reporting_weeks_needed - 1)) { - check_date <- end_date - (weeks_back * 7) - wwy <- get_iso_week_year(check_date) - weeks_needed <- rbind(weeks_needed, data.frame(week = wwy$week, year = wwy$year, date = check_date)) -} - -missing_weeks_dates <- c() # Will store the earliest date of missing weeks -earliest_missing_date <- end_date # Start with end_date, go back if needed -missing_weeks <- data.frame() # Track ALL missing weeks for later processing by Script 40 - -for (i in 1:nrow(weeks_needed)) { - week_num <- weeks_needed[i, "week"] - year_num <- weeks_needed[i, "year"] - check_date <- weeks_needed[i, "date"] - - # Pattern must be flexible to match both: - # - Single-file: week_51_2025.tif (top-level) - # - Single-file per-field: week_51_2025.tif (in {FIELD}/ subdirectories) - # - Tiled: week_51_2025_01.tif, week_51_2025_02.tif, etc. - week_pattern_check <- sprintf("week_%02d_%d", week_num, year_num) - files_this_week <- c() - - if (mosaic_mode == "tiled") { - mosaic_dir_check <- get_mosaic_dir(project_dir, mosaic_mode = "tiled") - if (dir.exists(mosaic_dir_check)) { - # NEW: Support per-field architecture - search recursively for mosaics in field subdirectories - files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check, recursive = TRUE, full.names = FALSE) - } - } else if (mosaic_mode == "single-file") { - mosaic_dir_check <- paths$weekly_mosaic_dir - if (dir.exists(mosaic_dir_check)) { - # NEW: Support per-field architecture - search recursively for mosaics in field subdirectories - # Check both top-level (legacy) and field subdirectories (per-field architecture) - files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check, recursive = TRUE, full.names = FALSE) - } - } - - cat(sprintf( - " Week %02d/%d (%s): %s\n", week_num, year_num, format(check_date, "%Y-%m-%d"), - if (length(files_this_week) > 0) "✓ EXISTS" else "✗ MISSING" - )) - - # If week is missing, track its date range for downloading/processing - if (length(files_this_week) == 0) { - week_start <- check_date - 6 # Monday of that week - if (week_start < earliest_missing_date) { - earliest_missing_date <- week_start - } - # Add to missing_weeks dataframe - Script 40 will process these - missing_weeks <- rbind(missing_weeks, data.frame(week = week_num, year = year_num, week_end_date = check_date)) - } -} - -# Calculate dynamic offset for preprocessing: only process from earliest missing week to end_date -if (earliest_missing_date < end_date) { - cat(sprintf("\n[INFO] Missing week(s) detected - need to fill from %s onwards\n", format(earliest_missing_date, "%Y-%m-%d"))) - - # Adjust offset to cover only the gap (from earliest missing week to end_date) - dynamic_offset <- as.numeric(end_date - earliest_missing_date) - cat(sprintf( - "[INFO] Will download/process ONLY missing dates: %d days (from %s to %s)\n", - dynamic_offset, format(earliest_missing_date, "%Y-%m-%d"), format(end_date, "%Y-%m-%d") - )) - - # Use dynamic offset for data generation scripts (10, 20, 30, 40) - # But Script 80 still uses full reporting_weeks_needed offset for KPI calculations - data_generation_offset <- dynamic_offset - force_data_generation <- TRUE -} else { - cat("\n[INFO] ✓ All required mosaics exist - using normal reporting window\n") - data_generation_offset <- offset # Use default reporting window offset - force_data_generation <- FALSE -} - -# ============================================================================== -# CHECK KPI REQUIREMENTS FOR REPORTING WINDOW -# ============================================================================== -# Scripts 90 (Word report) and 91 (Excel report) require KPIs for full reporting window -# Script 80 ALWAYS runs and will CALCULATE missing KPIs, so this is just for visibility -# Uses centralized check_kpi_completeness() function from parameters_project.R -cat("\n========== KPI REQUIREMENT CHECK ==========\n") -cat(sprintf( - "KPIs needed for reporting: %d weeks (current week + %d weeks history)\n", - reporting_weeks_needed, reporting_weeks_needed - 1 -)) - -# Check KPI completeness (replaces duplicate logic from lines ~228-270 and ~786-810) -kpi_check <- check_kpi_completeness(project_dir, client_type, end_date, reporting_weeks_needed) -kpi_dir <- kpi_check$kpi_dir -kpis_needed <- kpi_check$kpis_df -kpis_missing_count <- kpi_check$missing_count - -# Create KPI directory if it doesn't exist -if (!dir.exists(kpi_dir)) { - dir.create(kpi_dir, recursive = TRUE, showWarnings = FALSE) -} - -# Display status for each week -if (nrow(kpis_needed) > 0) { - for (i in 1:nrow(kpis_needed)) { - row <- kpis_needed[i, ] - cat(sprintf( - " Week %02d/%d (%s): %s (%d files)\n", - row$week, row$year, format(row$date, "%Y-%m-%d"), - if (row$has_kpis) "✓ EXISTS" else "✗ WILL BE CALCULATED", - row$file_count - )) - } -} else { - cat(" (No weeks in reporting window)\n") -} - -cat(sprintf( - "\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by Script 80\n", - nrow(kpis_needed) - kpis_missing_count, nrow(kpis_needed), kpis_missing_count -)) - -# Define conditional script execution based on client type -# Client types: -# - "cane_supply": Runs Scripts 20,21,22,23,30,31,80,91 (full pipeline with Excel output) -# - "agronomic_support": Runs Scripts 20,30,80,90 only (KPI calculation + Word report) -# -# Scripts that ALWAYS run (regardless of client type): -# - 00: Python Download -# - 10: Tiling (if outputs don't exist) -# - 20: CI Extraction -# - 30: Growth Model -# - 40: Mosaic Creation -# - 80: KPI Calculation -# -# Scripts that are client-type specific: -# - 21: CI RDS→CSV (cane_supply only) -# - 22: (cane_supply only) -# - 23: (cane_supply only) -# - 31: Harvest Imminent (cane_supply only) -# - 90: Legacy Word Report (agronomic_support only) -# - 91: Modern Excel Report (cane_supply only) -skip_cane_supply_only <- (client_type != "cane_supply") # Skip Scripts 21,22,23,31 for non-cane_supply -run_legacy_report <- (client_type == "agronomic_support") # Script 90 for agronomic support -run_modern_report <- (client_type == "cane_supply") # Script 91 for cane supply - # ============================================================================== # INTELLIGENT CHECKING: What has already been completed? # ============================================================================== cat("\n========== CHECKING EXISTING OUTPUTS ==========\n") -# Use centralized mosaic mode detection from parameters_project.R -cat(sprintf("Auto-detected mosaic mode: %s\n", mosaic_mode)) - -# Check Script 10 outputs - FLEXIBLE: look for tiles either directly OR in grid subdirs -tiles_split_base <- paths$daily_tiles_split_dir +# Check Script 10 outputs (field_tiles with per-field TIFFs) +# Script 10 outputs to field_tiles/{field_id}/{date}.tif +field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles") tiles_dates <- c() -if (dir.exists(tiles_split_base)) { - # Try grid-size subdirectories first (5x5, 10x10, etc.) - preferred new structure - subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - - if (length(grid_patterns) > 0) { - # New structure: daily_tiles_split/{grid_size}/{dates}/ - grid_dir <- file.path(tiles_split_base, grid_patterns[1]) - tiles_dates <- list.dirs(grid_dir, full.names = FALSE, recursive = FALSE) - } else { - # Old structure: daily_tiles_split/{dates}/ (no grid-size subfolder) - tiles_dates <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) +if (dir.exists(field_tiles_dir)) { + # Get all field subdirectories + field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs) > 0) { + # Get unique dates from all field directories + all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + tiles_dates <- unique(sub("\\.tif$", "", all_files)) } } -cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates))) +cat(sprintf("Script 10: %d dates already tiled (field_tiles/)\n", length(tiles_dates))) -# Check Script 20 outputs (CI extraction) - daily RDS files -ci_daily_dir <- paths$daily_ci_vals_dir -ci_files <- if (dir.exists(ci_daily_dir)) { - list.files(ci_daily_dir, pattern = "\\.rds$") -} else { - c() +# Check Script 20 outputs (CI extraction) - per-field CI TIFFs at field_tiles_CI/{FIELD}/{DATE}.tif +# NOTE: This is the NEW per-field format, not the old extracted_ci/ flat format +field_tiles_ci_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI") +ci_tiff_dates <- c() +if (dir.exists(field_tiles_ci_dir)) { + # Get all field subdirectories + field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs) > 0) { + # Get unique dates from all field directories (dates that have been processed through Script 20) + all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + ci_tiff_dates <- unique(sub("\\.tif$", "", all_files)) + } } -cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files))) +cat(sprintf("Script 20: %d dates already processed (field_tiles_CI/)\n", length(ci_tiff_dates))) # Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this # Instead, check if CI RDS files exist - if they do, 21 should also run # For now, just note that CSV is time-dependent, not a good skip indicator cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n") -# Check Script 40 outputs (mosaics) - check which weeks are missing (not just current week) -# The early check section already identified missing_weeks, so we use that -skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Only skip if NO missing weeks AND not forcing rerun -cat(sprintf("Script 40: %d missing week(s) to create\n", nrow(missing_weeks))) +# Check Script 40 outputs (mosaics in weekly_tile_max/5x5) +mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") +mosaic_files <- if (dir.exists(mosaic_dir)) { + list.files(mosaic_dir, pattern = "\\.tif$") +} else { + c() +} +cat(sprintf("Script 40: %d mosaic files exist\n", length(mosaic_files))) -# Check Script 80 outputs (KPIs in reports/kpis/{field_level|field_analysis}) -# kpi_dir already set by check_kpi_completeness() above -# Script 80 exports to .xlsx (Excel) and .rds (RDS) formats +# Check Script 80 outputs (KPIs in reports/kpis/field_stats) +kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") kpi_files <- if (dir.exists(kpi_dir)) { - list.files(kpi_dir, pattern = "\\.xlsx$|\\.rds$") + list.files(kpi_dir, pattern = "\\.csv$|\\.json$") } else { c() } cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files))) -# Determine if scripts should run based on outputs AND client type -skip_10 <- (length(tiles_dates) > 0 && !force_rerun && !force_data_generation) # Force Script 10 if missing weeks detected -skip_20 <- FALSE # Script 20 ALWAYS runs for all client types - processes new downloaded data -skip_21 <- skip_cane_supply_only # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion) -skip_22 <- skip_cane_supply_only # Script 22 runs ONLY for cane_supply clients -skip_23 <- skip_cane_supply_only # Script 23 runs ONLY for cane_supply clients -skip_30 <- FALSE # Script 30 ALWAYS runs for all client types -skip_31 <- skip_cane_supply_only # Script 31 runs ONLY for cane_supply clients -skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Skip Script 40 only if NO missing weeks -skip_80 <- (kpis_missing_count == 0 && !force_rerun) # Skip Script 80 only if ALL KPIs exist AND not forcing rerun +# Determine if scripts should run based on outputs +skip_10 <- FALSE # Script 10 should always run to pick up any new merged_tif files +skip_20 <- FALSE # Script 20 always runs to process dates in the current window (per-field format) +skip_21 <- FALSE # Skip 21 only if 20 is skipped +skip_40 <- length(mosaic_files) > 0 && !force_rerun +skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks -cat("\nSkipping decisions (based on outputs AND client type):\n") -cat(sprintf(" Script 10: %s\n", if (skip_10) "SKIP" else "RUN")) -cat(sprintf(" Script 20: RUN (always runs to process new downloads)\n")) -cat(sprintf(" Script 21: %s %s\n", if (skip_21) "SKIP" else "RUN", if (skip_cane_supply_only && !skip_21) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 22: %s %s\n", if (skip_22) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 23: %s %s\n", if (skip_23) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 30: %s (always runs)\n", if (skip_30) "SKIP" else "RUN")) -cat(sprintf(" Script 31: %s %s\n", if (skip_31) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 40: %s (looping through %d missing weeks)\n", if (skip_40) "SKIP" else "RUN", nrow(missing_weeks))) -cat(sprintf(" Script 80: %s (always runs)\n", if (skip_80) "SKIP" else "RUN")) -cat(sprintf(" Script 90: %s %s\n", if (!run_legacy_report) "SKIP" else "RUN", if (run_legacy_report) "(agronomic_support legacy report)" else "")) -cat(sprintf(" Script 91: %s %s\n", if (!run_modern_report) "SKIP" else "RUN", if (run_modern_report) "(cane_supply modern report)" else "")) +cat("\nSkipping decisions:\n") +cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP (tiles exist)" else "RUN")) +cat(sprintf(" Script 20: %s\n", if(skip_20) "SKIP (CI exists)" else "RUN")) +cat(sprintf(" Script 21: %s\n", if(skip_21) "SKIP (CI exists)" else "RUN")) +cat(sprintf(" Script 40: %s\n", if(skip_40) "SKIP (mosaics exist)" else "RUN")) +cat(sprintf(" Script 80: %s\n", if(skip_80) "SKIP (KPIs exist)" else "RUN")) # ============================================================================== # PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY) # ============================================================================== cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n") -tryCatch( - { - # Setup paths - # NOTE: All downloads go to merged_tif/ regardless of project - # (data_source variable is used later by Script 20 for reading, but downloads always go to merged_tif) - merged_tifs_dir <- paths$merged_tif_folder # Always check merged_tif for downloads - - cat(sprintf("[DEBUG] Checking for existing files in: %s\n", merged_tifs_dir)) - cat(sprintf("[DEBUG] Directory exists: %s\n", dir.exists(merged_tifs_dir))) - - # Get existing dates from raw TIFFs in merged_tif/ - existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") - existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) - - cat(sprintf("[DEBUG] Found %d existing TIFF files\n", length(existing_tiff_files))) - if (length(existing_tiff_files) > 0) { - cat(sprintf("[DEBUG] Sample files: %s\n", paste(head(existing_tiff_files, 3), collapse=", "))) - } - - # Find missing dates in the window - start_date <- end_date - data_generation_offset - date_seq <- seq(start_date, end_date, by = "day") - target_dates <- format(date_seq, "%Y-%m-%d") - - # Get existing dates from tiles (better indicator of completion for tiled projects) - existing_tile_dates <- tiles_dates - - # CRITICAL FIX: Always use TIFF dates for checking existing files - # This is the source of truth - if merged_tif/ has a file, don't re-download it - # We don't download again if the file exists, regardless of whether tiles have been created yet - if (length(existing_tiff_dates) > 0) { - cat(sprintf("[DEBUG] Using TIFF dates for existence check (found %d existing files)\n", length(existing_tiff_dates))) - # IMPORTANT: Only consider existing TIFF dates that fall within our target window - # This prevents old 2025 data from masking missing 2026 data - existing_tile_dates <- existing_tiff_dates[existing_tiff_dates %in% target_dates] - } - - # Only download if files don't exist yet (tiles for tiled projects, TIFFs for single-file) - missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)] - - if (mosaic_mode == "single-file") { - cat(sprintf(" Existing TIFF dates: %d\n", length(existing_tile_dates))) - } else { - cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates))) - } - cat(sprintf(" Missing dates in window: %d\n", length(missing_dates))) - - # Download each missing date - download_count <- 0 - download_failed <- 0 - - if (length(missing_dates) > 0) { - # Save current directory - original_dir <- getwd() - - # Change to python_app directory so relative paths work correctly - setwd("python_app") - - for (date_str in missing_dates) { - cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str) - result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE) - if (result == 0) { - download_count <- download_count + 1 - } else { - download_failed <- download_failed + 1 - } - } - - # Change back to original directory - setwd(original_dir) - } - - cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed)) - if (download_failed > 0) { - cat("⚠ Some downloads failed, but continuing pipeline\n") - } - - # Force Script 10 to run ONLY if downloads actually succeeded (not just attempted) - if (download_count > 0) { - skip_10 <- FALSE - } - }, - error = function(e) { - cat("✗ Error in planet download:", e$message, "\n") - pipeline_success <<- FALSE +tryCatch({ + # Setup paths + base_path <- file.path("laravel_app", "storage", "app", project_dir) + + # Always check merged_tif/ for existing downloads (both modes) + # merged_tif/ is where Python downloads go, before Script 10 splits to field_tiles/ + merged_tifs_dir <- file.path(base_path, "merged_tif") + existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) + + if (migrate_legacy_format) { + cat(sprintf(" Migration mode: Checking merged_tif/ for existing dates\n")) + } else { + cat(sprintf(" Production mode: Checking merged_tif/ and field_tiles/ for existing dates\n")) } -) + + # Find missing dates in the window + # Window: from (end_date - offset) to end_date + # Example: if end_date=2026-02-04 and offset=7, window is 2026-01-28 to 2026-02-04 (8 dates) + start_date <- end_date - offset + date_seq <- seq(start_date, end_date, by = "day") + target_dates <- format(date_seq, "%Y-%m-%d") + + # Also check field_tiles/ for dates that have already been processed through Script 10 + # field_tiles/ contains {field_id}/{date}.tif files - check which dates are present + field_tiles_dir <- file.path(base_path, "field_tiles") + processed_dates <- c() + if (dir.exists(field_tiles_dir)) { + # Get all field subdirectories + field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs) > 0) { + # Get unique dates from all field directories + all_files <- list.files(field_dirs, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + processed_dates <- unique(sub("\\.tif$", "", all_files)) + } + } + + # Combine existing dates from both merged_tif and field_tiles + all_existing_dates <- unique(c(existing_tiff_dates, processed_dates)) + + # Compare: which target dates don't exist in merged_tif/ or field_tiles/? + missing_dates <- target_dates[!(target_dates %in% all_existing_dates)] + + cat(sprintf(" Existing dates in merged_tif/: %d\n", length(existing_tiff_dates))) + cat(sprintf(" Processed dates in field_tiles/: %d\n", length(processed_dates))) + cat(sprintf(" Target window: %s to %s (%d dates)\n", start_date, end_date, length(target_dates))) + cat(sprintf(" Missing dates to download: %d\n", length(missing_dates))) + + # Download each missing date + download_count <- 0 + download_failed <- 0 + + if (length(missing_dates) > 0) { + # Save current directory + original_dir <- getwd() + + # Change to python_app directory so relative paths work correctly + setwd("python_app") + + for (date_str in missing_dates) { + cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str) + result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE) + if (result == 0) { + download_count <- download_count + 1 + } else { + download_failed <- download_failed + 1 + } + } + + # Change back to original directory + setwd(original_dir) + } + + cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed)) + if (download_failed > 0) { + cat("⚠ Some downloads failed, but continuing pipeline\n") + } + + # Force Script 10 to run ONLY if downloads actually succeeded (not just attempted) + if (download_count > 0) { + skip_10 <- FALSE + } + +}, error = function(e) { + cat("✗ Error in planet download:", e$message, "\n") + pipeline_success <<- FALSE +}) # ============================================================================== -# SCRIPT 10: CREATE PER-FIELD TIFFs +# MIGRATION: Move legacy format files to new format (if enabled) +# ============================================================================== +if (pipeline_success && migrate_legacy_format) { + cat("\n========== MIGRATION: MOVING LEGACY FORMAT FILES ==========\n") + tryCatch({ + base_path <- file.path("laravel_app", "storage", "app", project_dir) + + # PART 1: Move merged_tif files to field_tiles + merged_tif_old <- file.path(base_path, "merged_tif") + field_tiles_new <- file.path(base_path, "field_tiles") + + if (dir.exists(merged_tif_old)) { + tif_files <- list.files(merged_tif_old, pattern = "\\.tif$", full.names = TRUE) + if (length(tif_files) > 0) { + dir.create(field_tiles_new, showWarnings = FALSE, recursive = TRUE) + for (file in tif_files) { + file.rename(file, file.path(field_tiles_new, basename(file))) + } + cat(sprintf("✓ Moved %d TIFF files from merged_tif/ to field_tiles/\n", length(tif_files))) + } + } + + # PART 2: Move merged_tif_final files (CI) to field_tiles_CI + merged_tif_final_old <- file.path(base_path, "merged_tif_final") + field_tiles_ci_new <- file.path(base_path, "field_tiles_CI") + + if (dir.exists(merged_tif_final_old)) { + ci_files <- list.files(merged_tif_final_old, pattern = "\\.tif$", full.names = TRUE) + if (length(ci_files) > 0) { + dir.create(field_tiles_ci_new, showWarnings = FALSE, recursive = TRUE) + for (file in ci_files) { + file.rename(file, file.path(field_tiles_ci_new, basename(file))) + } + cat(sprintf("✓ Moved %d CI TIFF files from merged_tif_final/ to field_tiles_CI/\n", length(ci_files))) + } + } + + cat("✓ Migration completed successfully\n") + }, error = function(e) { + cat("✗ Error in migration:", e$message, "\n") + pipeline_success <<- FALSE + }) +} + +# ============================================================================== +# SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs # ============================================================================== if (pipeline_success && !skip_10) { - cat("\n========== RUNNING SCRIPT 10: CREATE PER-FIELD TIFFs ==========\n") - tryCatch( - { - # Run Script 10 via system() - NEW per-field version - # Arguments: project_dir - cmd <- sprintf( - '"%s" r_app/10_create_per_field_tiffs.R "%s"', - RSCRIPT_PATH, - project_dir - ) - result <- system(cmd) - - if (result != 0) { - stop("Script 10 exited with error code:", result) + cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n") + tryCatch({ + # Set environment variables for the script (Script 10 uses these for filtering) + assign("PROJECT", project_dir, envir = .GlobalEnv) + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + + # Count field_tiles/ dates BEFORE Script 10 runs + field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles") + field_dirs_before <- c() + if (dir.exists(field_tiles_dir)) { + field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_tmp) > 0) { + all_files_before <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + field_dirs_before <- unique(sub("\\.tif$", "", all_files_before)) } - - # Verify output - check per-field structure - field_tiles_dir <- paths$field_tiles_dir - if (dir.exists(field_tiles_dir)) { - fields <- list.dirs(field_tiles_dir, full.names = FALSE, recursive = FALSE) - fields <- fields[fields != ""] - total_files <- sum(sapply(file.path(field_tiles_dir, fields), function(f) length(list.files(f, pattern = "\\.tif$")))) - cat(sprintf("✓ Script 10 completed - created per-field TIFFs (%d fields, %d files)\n", length(fields), total_files)) - } else { - cat("✓ Script 10 completed\n") - } - }, - error = function(e) { - cat("✗ Error in Script 10:", e$message, "\n") - pipeline_success <<- FALSE } - ) + + # Suppress verbose per-date output, show only summary + sink(nullfile()) + source("r_app/10_create_per_field_tiffs.R") + sink() + + # Count field_tiles/ dates AFTER Script 10 runs + field_dirs_after <- c() + if (dir.exists(field_tiles_dir)) { + field_dirs_tmp <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_tmp) > 0) { + all_files_after <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + field_dirs_after <- unique(sub("\\.tif$", "", all_files_after)) + } + } + + # Calculate newly added dates + newly_added <- length(field_dirs_after) - length(field_dirs_before) + cat(sprintf("✓ Script 10 completed - processed %d new dates (total: %d dates in field_tiles/)\n", max(0, newly_added), length(field_dirs_after))) + }, error = function(e) { + sink() + cat("✗ Error in Script 10:", e$message, "\n") + pipeline_success <<- FALSE + }) } else if (skip_10) { - cat("\n========== SKIPPING SCRIPT 10 (per-field TIFFs already exist) ==========\n") -} - -# ============================================================================== -# CHECK: Per-Field TIFFs Without CI Data -# ============================================================================== -# IMPORTANT: Script 10 creates per-field TIFFs for ALL dates in merged_tif/ -# But Script 20 only processes dates within the offset window. -# This check finds dates that have per-field TIFFs but NO CI data, -# and forces Script 20 to process them regardless of offset. -cat("\n========== CHECKING FOR PER-FIELD TIFFs WITHOUT CI DATA ==========\n") - -field_tiles_dir <- paths$field_tiles_dir -field_tiles_ci_dir <- paths$field_tiles_ci_dir -ci_daily_dir <- paths$daily_ci_vals_dir - -# Get all dates that have per-field TIFFs -tiff_dates_all <- c() -if (dir.exists(field_tiles_dir)) { - # Check all field subdirectories - fields <- list.dirs(field_tiles_dir, full.names = FALSE, recursive = FALSE) - fields <- fields[fields != ""] - - if (length(fields) > 0) { - for (field in fields) { - field_path <- file.path(field_tiles_dir, field) - # Get dates from TIFF filenames: YYYY-MM-DD_*.tif or similar - tiff_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}.*\\.tif$") - dates_in_field <- unique(sub("_.*$", "", tiff_files)) # Extract YYYY-MM-DD - tiff_dates_all <- unique(c(tiff_dates_all, dates_in_field)) - } - } -} - -# Get all dates that have CI data (either from field_tiles_CI or extracted_ci) -ci_dates_all <- c() -if (dir.exists(field_tiles_ci_dir)) { - # Check all field subdirectories for CI TIFFs - fields_ci <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE) - fields_ci <- fields_ci[fields_ci != ""] - - if (length(fields_ci) > 0) { - for (field in fields_ci) { - field_path <- file.path(field_tiles_ci_dir, field) - ci_tiff_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}.*\\.tif$") - dates_in_field <- unique(sub("_.*$", "", ci_tiff_files)) - ci_dates_all <- unique(c(ci_dates_all, dates_in_field)) - } - } -} - -# Also check extracted_ci RDS files as source of truth -if (dir.exists(ci_daily_dir)) { - fields_rds <- list.dirs(ci_daily_dir, full.names = FALSE, recursive = FALSE) - fields_rds <- fields_rds[fields_rds != ""] - - if (length(fields_rds) > 0) { - for (field in fields_rds) { - field_path <- file.path(ci_daily_dir, field) - rds_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$") - dates_in_field <- sub("\\.rds$", "", rds_files) - ci_dates_all <- unique(c(ci_dates_all, dates_in_field)) - } - } -} - -# Find dates with TIFFs but no CI data -dates_missing_ci <- setdiff(tiff_dates_all, ci_dates_all) - -cat(sprintf("Total per-field TIFF dates: %d\n", length(tiff_dates_all))) -cat(sprintf("Total CI data dates: %d\n", length(ci_dates_all))) -cat(sprintf("Dates with TIFFs but NO CI: %d\n", length(dates_missing_ci))) - -# If there are per-field TIFFs without CI, force Script 20 to run with extended date range -if (length(dates_missing_ci) > 0) { - cat("\n⚠ Found per-field TIFFs without CI data - forcing Script 20 to process them\n") - cat(sprintf(" Sample missing dates: %s\n", paste(head(dates_missing_ci, 3), collapse=", "))) - - # Calculate extended date range: from earliest missing date to end_date - earliest_missing_tiff <- min(as.Date(dates_missing_ci)) - extended_offset <- as.numeric(end_date - earliest_missing_tiff) - - cat(sprintf(" Extended offset: %d days (from %s to %s)\n", - extended_offset, format(earliest_missing_tiff, "%Y-%m-%d"), format(end_date, "%Y-%m-%d"))) - - # Use extended offset for Script 20 - offset_for_ci <- extended_offset - skip_20 <- FALSE # Force Script 20 to run -} else { - cat("✓ All per-field TIFFs have corresponding CI data\n") - offset_for_ci <- offset # Use normal offset + cat("\n========== SKIPPING SCRIPT 10 (tiles already exist) ==========\n") } # ============================================================================== @@ -526,401 +308,322 @@ if (length(dates_missing_ci) > 0) { # ============================================================================== if (pipeline_success && !skip_20) { cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n") - tryCatch( - { - # Run Script 20 via system() to pass command-line args just like from terminal - # Arguments: project_dir end_date offset - # Use offset_for_ci which may have been extended if per-field TIFFs exist without CI - cmd <- sprintf( - '"%s" r_app/20_ci_extraction_per_field.R "%s" "%s" %d', - RSCRIPT_PATH, - project_dir, format(end_date, "%Y-%m-%d"), offset_for_ci - ) - result <- system(cmd) - - if (result != 0) { - stop("Script 20 exited with error code:", result) + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + + # If in migration mode, find all dates that need processing + if (migrate_legacy_format) { + cat("Migration mode: Finding all dates in field_tiles/ that need CI processing...\n") + + # Get all dates from field_tiles/ + field_tiles_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles") + all_dates_in_tiles <- c() + if (dir.exists(field_tiles_dir_check)) { + field_dirs_tmp <- list.dirs(field_tiles_dir_check, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_tmp) > 0) { + all_files_tmp <- list.files(field_dirs_tmp, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + all_dates_in_tiles <- unique(sub("\\.tif$", "", all_files_tmp)) + } } - - # Verify CI output was created - ci_daily_dir <- paths$daily_ci_vals_dir - if (dir.exists(ci_daily_dir)) { - files <- list.files(ci_daily_dir, pattern = "\\.rds$") - cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files))) - } else { - cat("✓ Script 20 completed\n") + + # Get dates already processed in field_tiles_CI/ + field_tiles_ci_check <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI") + processed_ci_dates <- c() + if (dir.exists(field_tiles_ci_check)) { + field_dirs_ci <- list.dirs(field_tiles_ci_check, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_ci) > 0) { + all_files_ci <- list.files(field_dirs_ci, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + processed_ci_dates <- unique(sub("\\.tif$", "", all_files_ci)) + } + } + + # Get dates already in old RDS format + old_rds_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") + processed_rds_dates <- c() + if (dir.exists(old_rds_dir)) { + rds_files <- list.files(old_rds_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$") + processed_rds_dates <- unique(sub("\\.rds$", "", rds_files)) + } + + # Find dates missing from either location + dates_missing_ci <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_ci_dates)] + dates_missing_rds <- all_dates_in_tiles[!(all_dates_in_tiles %in% processed_rds_dates)] + dates_to_process_migration <- sort(unique(c(dates_missing_ci, dates_missing_rds))) + + cat(sprintf(" All dates in field_tiles/: %d\n", length(all_dates_in_tiles))) + cat(sprintf(" Already in field_tiles_CI/: %d\n", length(processed_ci_dates))) + cat(sprintf(" Already in extracted_ci/daily_vals/: %d\n", length(processed_rds_dates))) + cat(sprintf(" Dates needing processing: %d\n", length(dates_to_process_migration))) + + if (length(dates_to_process_migration) > 0) { + assign("dates_to_process", dates_to_process_migration, envir = .GlobalEnv) + cat(sprintf(" Will process: %s to %s\n", dates_to_process_migration[1], dates_to_process_migration[length(dates_to_process_migration)])) } - }, - error = function(e) { - cat("✗ Error in Script 20:", e$message, "\n") - pipeline_success <<- FALSE } - ) + + source("r_app/20_ci_extraction_per_field.R") + main() + + # Verify output + field_tiles_ci_verify <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles_CI") + tiff_count <- 0 + if (dir.exists(field_tiles_ci_verify)) { + field_dirs_verify <- list.dirs(field_tiles_ci_verify, full.names = TRUE, recursive = FALSE) + if (length(field_dirs_verify) > 0) { + all_files_verify <- list.files(field_dirs_verify, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + tiff_count <- length(all_files_verify) + } + } + cat(sprintf("✓ Script 20 completed - %d CI TIFFs in field_tiles_CI/\n", tiff_count)) + }, error = function(e) { + cat("✗ Error in Script 20:", e$message, "\n") + pipeline_success <<- FALSE + }) } else if (skip_20) { cat("\n========== SKIPPING SCRIPT 20 (CI already extracted) ==========\n") } # ============================================================================== -# SCRIPT 21: CONVERT CI RDS TO CSV +# SCRIPT 30: INTERPOLATE GROWTH MODEL +# ============================================================================== +if (pipeline_success) { + cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + + source("r_app/30_interpolate_growth_model.R") + main() # Call main() to execute the script with the environment variables + + # Verify interpolated output + growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated") + if (dir.exists(growth_dir)) { + files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$") + cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files))) + } else { + cat("✓ Script 30 completed\n") + } + }, error = function(e) { + cat("✗ Error in Script 30:", e$message, "\n") + pipeline_success <<- FALSE + }) +} + +# ============================================================================== +# SCRIPT 21: CONVERT CI RDS TO CSV (uses Script 30 output) # ============================================================================== if (pipeline_success && !skip_21) { cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n") - tryCatch( - { - # Set environment variables for the script - assign("end_date", end_date, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) - assign("project_dir", project_dir, envir = .GlobalEnv) - - source("r_app/21_convert_ci_rds_to_csv.R") - main() # Call main() to execute the script with the environment variables - - # Verify CSV output was created - ci_csv_path <- paths$ci_for_python_dir - if (dir.exists(ci_csv_path)) { - csv_files <- list.files(ci_csv_path, pattern = "\\.csv$") - cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files))) - } else { - cat("✓ Script 21 completed\n") - } - }, - error = function(e) { - cat("✗ Error in Script 21:", e$message, "\n") - pipeline_success <<- FALSE + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + + source("r_app/21_convert_ci_rds_to_csv.R") + main() # Call main() to execute the script with the environment variables + + # Verify CSV output was created + ci_csv_path <- file.path("laravel_app", "storage", "app", project_dir, "ci_extracted") + if (dir.exists(ci_csv_path)) { + csv_files <- list.files(ci_csv_path, pattern = "\\.csv$") + cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files))) + } else { + cat("✓ Script 21 completed\n") } - ) + }, error = function(e) { + cat("✗ Error in Script 21:", e$message, "\n") + pipeline_success <<- FALSE + }) } else if (skip_21) { cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n") } -# ============================================================================== -# SCRIPT 30: INTERPOLATE GROWTH MODEL -# ============================================================================== -if (pipeline_success && !skip_30) { - cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") - tryCatch( - { - # Run Script 30 via system() to pass command-line args just like from terminal - # Script 30 expects: project_dir only - # Per-field version reads CI data from Script 20 per-field output location - cmd <- sprintf( - '"%s" r_app/30_interpolate_growth_model.R "%s"', - RSCRIPT_PATH, - project_dir - ) - result <- system(cmd) - - if (result != 0) { - stop("Script 30 exited with error code:", result) - } - - # Verify interpolated output - Script 30 saves to cumulative_ci_vals_dir - cumulative_ci_vals_dir <- paths$cumulative_ci_vals_dir - if (dir.exists(cumulative_ci_vals_dir)) { - files <- list.files(cumulative_ci_vals_dir, pattern = "\\.rds$") - cat(sprintf("✓ Script 30 completed - generated %d interpolated RDS file(s)\n", length(files))) - } else { - cat("✓ Script 30 completed\n") - } - }, - error = function(e) { - cat("✗ Error in Script 30:", e$message, "\n") - pipeline_success <<- FALSE - } - ) -} - # ============================================================================== # PYTHON 31: HARVEST IMMINENT WEEKLY # ============================================================================== -if (pipeline_success && !skip_31) { +if (pipeline_success) { cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n") - tryCatch( - { - # Run Python script in pytorch_gpu conda environment - # Script expects positional project name (not --project flag) - # Run from smartcane root so conda can find the environment - cmd <- sprintf("conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s", project_dir) - result <- system(cmd) - - if (result == 0) { - # Verify harvest output - check for THIS WEEK's specific file - wwy_current_31 <- get_iso_week_year(end_date) - harvest_exists <- check_harvest_output_exists(project_dir, wwy_current_31$week, wwy_current_31$year) - - if (harvest_exists) { - cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", wwy_current_31$week)) - } else { - cat("✓ Script 31 completed (check if harvest.xlsx is available)\n") - } + tryCatch({ + # Run Python script in pytorch_gpu conda environment + # Script expects positional project name (not --project flag) + # Run from smartcane root so conda can find the environment + cmd <- sprintf('conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s', project_dir) + cat("DEBUG: Running command:", cmd, "\n") + result <- system(cmd) + + if (result == 0) { + # Verify harvest output - check for THIS WEEK's specific file + current_week <- as.numeric(format(end_date, "%V")) + current_year <- as.numeric(format(end_date, "%Y")) + expected_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats", + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) + + if (file.exists(expected_file)) { + cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", current_week)) } else { - cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n") + cat("✓ Script 31 completed (check if harvest.xlsx is available)\n") } - }, - error = function(e) { - setwd(original_dir) - cat("⚠ Script 31 error:", e$message, "\n") + } else { + cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n") } - ) -} else if (skip_31) { - cat("\n========== SKIPPING SCRIPT 31 (non-cane_supply client type) ==========\n") + }, error = function(e) { + setwd(original_dir) + cat("⚠ Script 31 error:", e$message, "\n") + }) } # ============================================================================== -# SCRIPT 40: MOSAIC CREATION (LOOP THROUGH MISSING WEEKS) +# SCRIPT 40: MOSAIC CREATION # ============================================================================== if (pipeline_success && !skip_40) { cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n") - - # If there are missing weeks, process them one at a time - if (nrow(missing_weeks) > 0) { - cat(sprintf("Found %d missing week(s) - running Script 40 once per week\n\n", nrow(missing_weeks))) - - # Loop through missing weeks in reverse chronological order (oldest first) - for (week_idx in nrow(missing_weeks):1) { - missing_week <- missing_weeks[week_idx, ] - week_num <- missing_week$week - year_num <- missing_week$year - week_end_date <- as.Date(missing_week$week_end_date) - - cat(sprintf( - "--- Creating mosaic for week %02d/%d (ending %s) ---\n", - week_num, year_num, format(week_end_date, "%Y-%m-%d") - )) - - tryCatch( - { - # Run Script 40 with offset=7 (one week only) for this specific week - # The end_date is the last day of the week, and offset=7 covers the full 7-day week - # Arguments: end_date offset project_dir - cmd <- sprintf( - '"%s" r_app/40_mosaic_creation_per_field.R "%s" 7 "%s"', - RSCRIPT_PATH, - format(week_end_date, "%Y-%m-%d"), project_dir - ) - result <- system(cmd) - - if (result != 0) { - stop("Script 40 exited with error code:", result) - } - - # Verify mosaic was created for this specific week (centralized helper function) - mosaic_check <- check_mosaic_exists(project_dir, week_num, year_num, mosaic_mode) - mosaic_created <- mosaic_check$created - - if (mosaic_created) { - cat(sprintf("✓ Week %02d/%d mosaic created successfully\n\n", week_num, year_num)) - } else { - cat(sprintf("✓ Week %02d/%d processing completed (verify output)\n\n", week_num, year_num)) - } - }, - error = function(e) { - cat(sprintf("✗ Error creating mosaic for week %02d/%d: %s\n", week_num, year_num, e$message), "\n") - pipeline_success <<- FALSE - } - ) + tryCatch({ + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + + source("r_app/40_mosaic_creation_per_field.R") + main() # Call main() to execute the script with the environment variables + + # Verify mosaic output + mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") + if (dir.exists(mosaic_dir)) { + files <- list.files(mosaic_dir, pattern = "\\.tif$") + cat(sprintf("✓ Script 40 completed - generated %d mosaic files\n", length(files))) + } else { + cat("✓ Script 40 completed\n") } - - if (pipeline_success) { - cat(sprintf("✓ Script 40 completed - created all %d missing week mosaics\n", nrow(missing_weeks))) - } - } else { - cat("No missing weeks detected - skipping Script 40\n") - skip_40 <- TRUE - } + }, error = function(e) { + cat("✗ Error in Script 40:", e$message, "\n") + pipeline_success <<- FALSE + }) } else if (skip_40) { cat("\n========== SKIPPING SCRIPT 40 (mosaics already created) ==========\n") } # ============================================================================== -# SCRIPT 80: CALCULATE KPIs (LOOP THROUGH REPORTING WINDOW) +# SCRIPT 80: CALCULATE KPIs # ============================================================================== -if (pipeline_success && !skip_80) { - cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs FOR REPORTING WINDOW ==========\n") - - # Build list of weeks that NEED calculation (missing KPIs) - weeks_to_calculate <- kpis_needed[!kpis_needed$has_kpis, ] # Only weeks WITHOUT KPIs - - if (nrow(weeks_to_calculate) > 0) { - # Sort by date (oldest to newest) for sequential processing - weeks_to_calculate <- weeks_to_calculate[order(weeks_to_calculate$date), ] - - cat(sprintf( - "Looping through %d missing week(s) in reporting window (from %s back to %s):\n\n", - nrow(weeks_to_calculate), - format(max(weeks_to_calculate$date), "%Y-%m-%d"), - format(min(weeks_to_calculate$date), "%Y-%m-%d") - )) - - tryCatch( - { - for (week_idx in 1:nrow(weeks_to_calculate)) { - week_row <- weeks_to_calculate[week_idx, ] - calc_date <- week_row$date - - # Run Script 80 for this specific week with offset=7 (one week only) - # This ensures Script 80 calculates KPIs for THIS week with proper trend data - cmd <- sprintf( - '"%s" r_app/80_calculate_kpis.R "%s" "%s" %d', - RSCRIPT_PATH, - format(calc_date, "%Y-%m-%d"), project_dir, 7 - ) # offset=7 for single week - - cat(sprintf( - " [Week %02d/%d] Running Script 80 with end_date=%s...\n", - week_row$week, week_row$year, format(calc_date, "%Y-%m-%d") - )) - - result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE) - - if (result == 0) { - cat(sprintf(" ✓ KPIs calculated for week %02d/%d\n", week_row$week, week_row$year)) - } else { - cat(sprintf( - " ✗ Error calculating KPIs for week %02d/%d (exit code: %d)\n", - week_row$week, week_row$year, result - )) - } - } - - # Verify total KPI output (kpi_dir defined by check_kpi_completeness() earlier) - if (dir.exists(kpi_dir)) { - files <- list.files(kpi_dir, pattern = "\\.xlsx$|\\.rds$") - # Extract subdir name from kpi_dir path for display - subdir_name <- basename(kpi_dir) - cat(sprintf("\n✓ Script 80 loop completed - total %d KPI files in %s/\n", length(files), subdir_name)) - } else { - cat("\n✓ Script 80 loop completed\n") - } - }, - error = function(e) { - cat("✗ Error in Script 80 loop:", e$message, "\n") - pipeline_success <<- FALSE - } - ) - } else { - cat(sprintf("✓ All %d weeks already have KPIs - skipping calculation\n", nrow(kpis_needed))) - } -} else if (skip_80) { - cat("\n========== SKIPPING SCRIPT 80 (all KPIs already exist) ==========\n") -} - -# ============================================================================== -# VERIFY KPI COMPLETION AFTER SCRIPT 80 -# ============================================================================== -# Recheck if all KPIs are now available (Script 80 should have calculated any missing ones) -cat("\n========== VERIFYING KPI COMPLETION ==========\n") - -kpis_complete <- TRUE -if (dir.exists(kpi_dir)) { - for (weeks_back in 0:(reporting_weeks_needed - 1)) { - check_date <- end_date - (weeks_back * 7) - week_num <- as.numeric(format(check_date, "%V")) - year_num <- as.numeric(format(check_date, "%G")) - - # Check for any KPI file from that week (flexible pattern to match all formats) - # Matches: week_05_2026, AURA_KPI_week_05_2026, etc. - week_pattern <- sprintf("_week_%02d_%d|week_%02d_%d", week_num, year_num, week_num, year_num) - # NEW: Support per-field architecture - search recursively for KPI files in field subdirectories - kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE) - - if (length(kpi_files_this_week) > 0) { - cat(sprintf(" Week %02d/%d: ✓ KPIs found (%d files)\n", week_num, year_num, length(kpi_files_this_week))) +if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the current week + cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n") + tryCatch({ + # Set environment variables for the script (Script 80's main() uses these as fallbacks) + # NOTE: end_date is already a Date, just assign directly without as.Date() + assign("end_date", end_date, envir = .GlobalEnv) + assign("end_date_str", end_date_str, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + + source("r_app/80_calculate_kpis.R") + main() # Call main() to execute the script with the environment variables + + # Verify KPI output + kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") + if (dir.exists(kpi_dir)) { + files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") + cat(sprintf("✓ Script 80 completed - generated %d KPI files\n", length(files))) } else { - kpis_complete <- FALSE - cat(sprintf(" Week %02d/%d: ✗ KPIs not found\n", week_num, year_num)) + cat("✓ Script 80 completed\n") } - } -} - -if (kpis_complete) { - cat("✓ All KPIs available - full reporting window complete\n") -} else { - cat("⚠ Note: Some KPIs may still be missing - Script 80 calculated what was available\n") + }, error = function(e) { + cat("✗ Error in Script 80:", e$message, "\n") + cat("Full error:\n") + print(e) + pipeline_success <<- FALSE + }) } # ============================================================================== -# SCRIPT 90: LEGACY WORD REPORT (agronomic_support clients) +# SCRIPT 90/91: GENERATE WORD REPORTS (CLIENT-TYPE SPECIFIC) # ============================================================================== -if (pipeline_success && run_legacy_report) { - cat("\n========== RUNNING SCRIPT 90: LEGACY WORD REPORT ==========\n") - - tryCatch( - { - # Script 90 is an RMarkdown file - compile it with rmarkdown::render() - output_dir <- paths$reports_dir - - # Reports directory already created by setup_project_directories - - output_filename <- sprintf( - "CI_report_week%02d_%d.docx", - as.numeric(format(end_date, "%V")), - as.numeric(format(end_date, "%G")) +if (pipeline_success) { + # Determine client type from project mapping + source("r_app/parameters_project.R") + source("r_app/00_common_utils.R") + client_type <- get_client_type(project_dir) + + if (client_type == "agronomic_support") { + # SCRIPT 90: Agronomic Support Report (for Aura) + cat("\n========== RUNNING SCRIPT 90: AGRONOMIC SUPPORT REPORT (WORD) ==========\n") + tryCatch({ + # Render the R Markdown file with parameters + # The Rmd file will load parameters_project and utilities internally + rmarkdown::render( + "r_app/90_CI_report_with_kpis_simple.Rmd", + params = list( + data_dir = project_dir, + report_date = end_date, + mail_day = "Monday", + borders = TRUE, + ci_plot_type = "both", + colorblind_friendly = FALSE, + facet_by_season = FALSE, + x_axis_unit = "days" + ), + output_file = sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str), + output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"), + quiet = FALSE, + knit_root_dir = getwd() ) - - # Render the RMarkdown document - rmarkdown::render( - input = "r_app/90_CI_report_with_kpis_simple.Rmd", - output_dir = output_dir, - output_file = output_filename, - params = list( - report_date = format(end_date, "%Y-%m-%d"), - data_dir = project_dir - ), - quiet = TRUE - ) - - cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", output_filename)) - }, - error = function(e) { - cat("✗ Error in Script 90:", e$message, "\n") - pipeline_success <<- FALSE + + # Verify report was created + report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", + sprintf("SmartCane_Report_agronomic_%s_%s.docx", project_dir, end_date_str)) + if (file.exists(report_file)) { + cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", basename(report_file))) + } else { + cat("⚠ Script 90 report file not found - check rendering\n") } - ) -} else if (run_legacy_report) { - cat("\n========== SKIPPING SCRIPT 90 (pipeline error) ==========\n") -} - -# ============================================================================== -# SCRIPT 91: MODERN WORD REPORT (cane_supply clients) -# ============================================================================== -if (pipeline_success && run_modern_report) { - cat("\n========== RUNNING SCRIPT 91: MODERN WORD REPORT ==========\n") - - tryCatch( - { - # Script 91 is an RMarkdown file - compile it with rmarkdown::render() - output_dir <- paths$reports_dir - - # Reports directory already created by setup_project_directories - - output_filename <- sprintf( - "CI_report_week%02d_%d.docx", - as.numeric(format(end_date, "%V")), - as.numeric(format(end_date, "%G")) - ) - - # Render the RMarkdown document - rmarkdown::render( - input = "r_app/91_CI_report_with_kpis_Angata.Rmd", - output_dir = output_dir, - output_file = output_filename, - params = list( - report_date = format(end_date, "%Y-%m-%d"), - data_dir = project_dir - ), - quiet = TRUE - ) - - cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", output_filename)) - }, - error = function(e) { - cat("✗ Error in Script 91:", e$message, "\n") - pipeline_success <<- FALSE + }, error = function(e) { + cat("✗ Error in Script 90:", e$message, "\n") + print(e) + pipeline_success <<- FALSE + }) + } else if (client_type == "cane_supply") { + # SCRIPT 91: Cane Supply Report (for Angata, Chemba, Xinavane, ESA) + cat("\n========== RUNNING SCRIPT 91: CANE SUPPLY REPORT (WORD) ==========\n") + tryCatch({ + # Render the R Markdown file with parameters + # The Rmd file will load parameters_project and utilities internally + rmarkdown::render( + "r_app/91_CI_report_with_kpis_Angata.Rmd", + params = list( + data_dir = project_dir, + report_date = end_date, + mail_day = "Monday", + borders = TRUE, + ci_plot_type = "both", + colorblind_friendly = FALSE, + facet_by_season = FALSE, + x_axis_unit = "days" + ), + output_file = sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str), + output_dir = file.path("laravel_app", "storage", "app", project_dir, "reports"), + quiet = FALSE, + knit_root_dir = getwd() + ) + + # Verify report was created + report_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", + sprintf("SmartCane_Report_cane_supply_%s_%s.docx", project_dir, end_date_str)) + if (file.exists(report_file)) { + cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", basename(report_file))) + } else { + cat("⚠ Script 91 report file not found - check rendering\n") } - ) -} else if (run_modern_report) { - cat("\n========== SKIPPING SCRIPT 91 (pipeline error) ==========\n") + }, error = function(e) { + cat("✗ Error in Script 91:", e$message, "\n") + print(e) + pipeline_success <<- FALSE + }) + } } # ============================================================================== @@ -935,4 +638,4 @@ if (pipeline_success) { } else { cat("Status: ✗ Pipeline failed - check errors above\n") } -cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80 → R 90/91\n") +cat("Pipeline sequence: Python Download → R 10 → R 20 → R 30 → R 21 → Python 31 → R 40 → R 80 → R 90/91\n")