464 lines
19 KiB
R
464 lines
19 KiB
R
# ==============================================================================
|
||
# SMARTCANE MANUAL PIPELINE RUNNER
|
||
# ==============================================================================
|
||
#
|
||
# This file documents all pipeline steps as MANUAL COPY-PASTE COMMANDS.
|
||
# Do NOT run this script directly - instead, copy individual commands and
|
||
# paste them into your PowerShell terminal.
|
||
#
|
||
# This approach allows you to:
|
||
# - Run steps one at a time and inspect outputs
|
||
# - Re-run failed steps without re-running successful ones
|
||
# - Monitor progress between steps
|
||
# - Troubleshoot issues more easily than with automated pipeline
|
||
#
|
||
# ==============================================================================
|
||
# PIPELINE SEQUENCE (IN ORDER)
|
||
# ==============================================================================
|
||
#
|
||
# 1. Python: Download Planet satellite imagery (optional - only if new data needed)
|
||
# 2. R10: Split farm TIFFs into per-field directory structure
|
||
# 3. R20: Extract Canopy Index (CI) from 4-band imagery
|
||
# 4. R30: Interpolate growth model (smooth CI time series)
|
||
# 5. R21: Convert CI data to CSV format for Python
|
||
# 6. Python31: Harvest imminent predictions (optional - requires harvest.xlsx)
|
||
# 7. R40: Create weekly mosaic TIFFs
|
||
# 8. R80: Calculate KPIs (field uniformity, trends, stress)
|
||
# 9. R90/91: Generate Word reports (optional - Agronomic or Cane Supply)
|
||
#
|
||
# ==============================================================================
|
||
# BEFORE YOU START
|
||
# ==============================================================================
|
||
#
|
||
# 1. Open PowerShell in the smartcane root directory:
|
||
# C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\
|
||
#
|
||
# 2. Define your parameters ONCE at the top of the session:
|
||
#
|
||
# $PROJECT = "angata" # Project: angata, chemba, xinavane, esa, simba
|
||
# $END_DATE = "2026-02-04" # YYYY-MM-DD format (e.g., 2026-02-04)
|
||
# $OFFSET = 7 # Days to look back (e.g., 7 for one week)
|
||
# $WEEK = 6 # ISO week number (1-53) - auto-calculated from END_DATE
|
||
# $YEAR = 2026 # ISO year - auto-calculated from END_DATE
|
||
#
|
||
# 3. Use these variables in the commands below by replacing [PROJECT], [END_DATE], etc.
|
||
#
|
||
# ==============================================================================
|
||
# COMMAND REFERENCE
|
||
# ==============================================================================
|
||
|
||
# ==============================================================================
|
||
# STEP 0: PYTHON - Download Planet Satellite Imagery (OPTIONAL)
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Download 4-band (RGB+NIR) satellite imagery from Planet Labs API
|
||
# Downloads to: laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif
|
||
#
|
||
# WHEN TO RUN:
|
||
# - Only needed if you have new dates to process
|
||
# - Pipeline skips dates already in merged_tif/ or field_tiles/
|
||
# - First-time setup: download for your date range
|
||
#
|
||
# PARAMETERS:
|
||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||
# DATE: YYYY-MM-DD format (e.g., 2026-02-04)
|
||
# RESOLUTION: 3 meters (default) - can also use 5, 10
|
||
# --cleanup: Delete intermediate files after download
|
||
# --clear-all: Clear all output folders before downloading
|
||
#
|
||
# COMMAND #1 - Single Date Download:
|
||
#
|
||
# cd python_app
|
||
# python 00_download_8band_pu_optimized.py [PROJECT] --date [DATE] --resolution 3 --cleanup
|
||
#
|
||
# Example:
|
||
# cd python_app
|
||
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
|
||
#
|
||
# COMMAND #2 - Batch Download (Multiple Dates):
|
||
# For date ranges, MUST use download_planet_missing_dates.py (not Script 00)
|
||
#
|
||
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
|
||
#
|
||
# Example:
|
||
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
|
||
#
|
||
# IMPORTANT DISTINCTION:
|
||
# - Script 00 (00_download_8band_pu_optimized.py): Only supports --date flag for SINGLE dates
|
||
# - Script download_planet_missing_dates.py: Supports --start/--end for DATE RANGES
|
||
# Script 00 does NOT have --start/--end flags despite documentation suggestion
|
||
# Use the correct script for your use case!
|
||
#
|
||
# EXPECTED OUTPUT:
|
||
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
|
||
#
|
||
# Note: Planet API requires authentication (PLANET_API_KEY environment variable)
|
||
# Cost: ~1,500-2,000 PU per date
|
||
#
|
||
# ============================================================================
|
||
|
||
|
||
# ==============================================================================
|
||
# STEP 1: R10 - Create Per-Field TIFF Structure
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Split farm-wide GeoTIFFs into per-field directory structure.
|
||
# Transforms: merged_tif/{DATE}.tif (single file)
|
||
# → field_tiles/{FIELD_ID}/{DATE}.tif (per-field files)
|
||
# This enables clean, scalable processing in downstream scripts.
|
||
#
|
||
# INPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif (4-band RGB+NIR)
|
||
# - Field boundaries: laravel_app/storage/app/{PROJECT}/pivot.geojson
|
||
#
|
||
# OUTPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif
|
||
# - One TIFF per field per date (1185 fields × N dates in Angata)
|
||
#
|
||
# PARAMETERS:
|
||
# PROJECT: angata, chemba, xinavane, esa, simba (default: angata)
|
||
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-09, default: today)
|
||
# OFFSET: Days to look back (e.g., 7 for one week, default: 7)
|
||
#
|
||
# COMMAND #1 - Default (All dates, current date, 7-day window):
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||
#
|
||
#
|
||
# COMMAND #2 - Specific Date Range:
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] [END_DATE] [OFFSET]
|
||
#
|
||
# Example (one week back from 2026-02-09):
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
|
||
#
|
||
# Example (two weeks back from 2026-02-09):
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 14
|
||
#
|
||
# EXPECTED OUTPUT:
|
||
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
|
||
# Storage location: laravel_app/storage/app/angata/field_tiles/
|
||
# Script execution time: 5-10 minutes (depends on number of dates)
|
||
#
|
||
# ============================================================================
|
||
|
||
|
||
# ==============================================================================
|
||
# STEP 2: R20 - Extract Chlorophyll Index (CI)
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Calculate Chlorophyll Index from 4-band imagery and create 5-band output TIFFs.
|
||
# Also extracts CI statistics per sub_field for daily tracking.
|
||
#
|
||
# INPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif (4-band)
|
||
#
|
||
# OUTPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif (5-band with CI)
|
||
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD_ID}/{DATE}.rds
|
||
#
|
||
# EXPECTED BEHAVIOR:
|
||
# If field_tiles_CI/ or daily_vals/ missing files, Script 20 will process them
|
||
# Script 20 skips files that already exist (to avoid re-processing)
|
||
# ⚠️ IF NOT ALL FILES CREATED: See troubleshooting section below
|
||
#
|
||
# PARAMETERS:
|
||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - date range end
|
||
# OFFSET: Days to look back (e.g., 7 for one week window)
|
||
#
|
||
# COMMAND:
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
|
||
#
|
||
# Example:
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-09 7
|
||
#
|
||
# EXPECTED OUTPUT:
|
||
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
|
||
# Example: 1185 fields × 8 dates = 9,480 files in field_tiles_CI/
|
||
# Storage location: laravel_app/storage/app/angata/field_tiles_CI/
|
||
# Script execution time: 10-20 minutes (depends on number of dates+fields)
|
||
#
|
||
# NOTES:
|
||
# Script 20 processes dates between (END_DATE - OFFSET) and END_DATE
|
||
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
|
||
# To process all existing merged_tif files: Use large OFFSET (e.g., 365)
|
||
#
|
||
# ============================================================================
|
||
|
||
|
||
# ==============================================================================
|
||
# STEP 3: R30 - Interpolate Growth Model
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Smooth CI time series using LOESS interpolation to fill gaps.
|
||
# Creates continuous growth curves for each field across all measurement dates.
|
||
# Enables trend analysis, yield prediction, and cumulative growth metrics.
|
||
#
|
||
# INPUT:
|
||
# - Daily CI statistics from Script 20 (field_tiles_CI/ per-field RDS files)
|
||
#
|
||
# OUTPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||
# - (This is the growth model output used by Script 21 and 80)
|
||
#
|
||
# PARAMETERS:
|
||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||
#
|
||
# COMMAND:
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R [PROJECT]
|
||
#
|
||
# Example:
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||
#
|
||
# EXPECTED OUTPUT:
|
||
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||
# Contains: Interpolated CI data for all fields (wide format)
|
||
#
|
||
# ============================================================================
|
||
|
||
|
||
# ==============================================================================
|
||
# STEP 4: R21 - Convert CI RDS to CSV (Python Format)
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Convert growth model output from R's RDS format to Python-compatible CSV.
|
||
# Transforms from wide format (fields × dates) to long format (one row per field-date pair).
|
||
# Prepares data for Python harvest detection models.
|
||
#
|
||
# INPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||
# (Output from Script 30)
|
||
#
|
||
# OUTPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||
# - Columns: field, sub_field, Date, FitData, DAH, value
|
||
#
|
||
# PARAMETERS:
|
||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||
#
|
||
# COMMAND:
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R [PROJECT]
|
||
#
|
||
# Example:
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||
#
|
||
# EXPECTED OUTPUT:
|
||
# File: ci_data_for_python.csv (~5-10 MB)
|
||
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
|
||
#
|
||
# ============================================================================
|
||
|
||
|
||
# ==============================================================================
|
||
# STEP 5: PYTHON31 - Harvest Imminent Predictions (OPTIONAL)
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Predict which fields are approaching harvest in the next 28 days.
|
||
# Uses neural network (Model 307) trained on historical harvest dates.
|
||
# Generates weekly probability scores for operational harvest scheduling.
|
||
#
|
||
# REQUIRES:
|
||
# - harvest.xlsx with field planting/harvest dates
|
||
# - ci_data_for_python.csv from Script 21
|
||
# - PyTorch environment (conda pytorch_gpu)
|
||
#
|
||
# INPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/Data/harvest.xlsx
|
||
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||
#
|
||
# OUTPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||
# - Columns: field, sub_field, imminent_prob, detected_prob, week, year, as_of_date, num_days
|
||
#
|
||
# PARAMETERS:
|
||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||
#
|
||
# COMMAND:
|
||
#
|
||
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py [PROJECT]
|
||
#
|
||
# Example:
|
||
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py angata
|
||
#
|
||
# EXPECTED OUTPUT:
|
||
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||
# Rows: One per field (e.g., 1185 rows for Angata)
|
||
#
|
||
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
|
||
#
|
||
# ============================================================================
|
||
|
||
|
||
# ==============================================================================
|
||
# STEP 6: R40 - Create Weekly Mosaic TIFFs
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Aggregate daily per-field CI TIFFs into weekly mosaics.
|
||
# Handles multiple dates (full week) with maximum CI value per pixel.
|
||
# Creates 5-band output for reporting and KPI calculations.
|
||
#
|
||
# INPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif
|
||
# (Daily per-field CI TIFFs from Script 20)
|
||
#
|
||
# OUTPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_{WW}_{YYYY}.tif
|
||
# - One per field per week (e.g., 1185 fields × 1 week = 1,185 files)
|
||
#
|
||
# PARAMETERS:
|
||
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - determines ISO week
|
||
# OFFSET: Days to look back (e.g., 7 for one week window)
|
||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||
#
|
||
# COMMAND:
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R [END_DATE] [OFFSET] [PROJECT]
|
||
#
|
||
# Example (one week window):
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||
#
|
||
# EXPECTED OUTPUT:
|
||
# Location: laravel_app/storage/app/angata/weekly_mosaic/
|
||
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
|
||
# Files created: #fields (e.g., 1185 for Angata)
|
||
# Storage: ~50-100 MB total for all mosaic TIFFs
|
||
# Script execution time: 5-10 minutes
|
||
#
|
||
# NOTE: Files are named with ISO week number (WW) and year (YYYY)
|
||
# Week calculation is automatic based on END_DATE
|
||
#
|
||
# ============================================================================
|
||
|
||
|
||
# ==============================================================================
|
||
# STEP 7: R80 - Calculate Key Performance Indicators (KPIs)
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Calculate per-field metrics from weekly mosaic TIFFs:
|
||
# - Field uniformity (CV - Coefficient of Variation)
|
||
# - Growth trends (4-week and 8-week)
|
||
# - Area change detection
|
||
# - TCH forecast
|
||
# - Spatial clustering (weed/stress detection)
|
||
# - Generates Excel export for dashboards and reporting
|
||
#
|
||
# INPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_*.tif
|
||
# - Field boundaries (pivot.geojson)
|
||
# - Harvest data (harvest.xlsx)
|
||
# - Historical stats cache (RDS from previous weeks)
|
||
#
|
||
# OUTPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds (cached stats)
|
||
# - 21 columns with field-level KPIs and alerts
|
||
#
|
||
# PARAMETERS:
|
||
# END_DATE: Report date in YYYY-MM-DD format (default: today)
|
||
# PROJECT: Project name: angata, chemba, xinavane, esa, simba (default: angata)
|
||
# OFFSET: Days to look back for historical comparison (default: 7, for backward compatibility)
|
||
#
|
||
# COMMAND #1 - Current Date & Default Project (Auto-detects TODAY):
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
|
||
#
|
||
# Example:
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
|
||
#
|
||
# COMMAND #2 - Specific Date & Project:
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET]
|
||
#
|
||
# Example (2026-02-09, angata, 7-day lookback):
|
||
#
|
||
#
|
||
# EXPECTED OUTPUT:
|
||
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||
# Rows: One per field (e.g., 1185 for Angata)
|
||
# Columns: 21 KPI columns (uniformity, trend, alerts, etc.)
|
||
# Location: laravel_app/storage/app/angata/output/
|
||
# Script execution time: 10-20 minutes
|
||
#
|
||
# EXPECTED COLUMNS:
|
||
# field, sub_field, phase, cv (uniformity), ci_mean, area_ha, area_ac,
|
||
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
|
||
# spatial_cluster, alert_urgency, alert_type, alert_message, etc.
|
||
#
|
||
# CRITICAL DIFFERENCE - R80 Uses Different Argument Order Than R40:
|
||
# R40 order: [END_DATE] [OFFSET] [PROJECT]
|
||
# R80 order: [END_DATE] [PROJECT] [OFFSET]
|
||
# These are NOT the same! Ensure correct order for each script.
|
||
#
|
||
# ============================================================================
|
||
|
||
|
||
# ==============================================================================
|
||
# STEP 8: R90/R91 - Generate Word Report (OPTIONAL)
|
||
# ==============================================================================
|
||
#
|
||
# PURPOSE:
|
||
# Generate formatted Word report (.docx) with:
|
||
# - KPI summary tables and charts
|
||
# - Per-field performance metrics
|
||
# - Alerts and recommendations
|
||
# - Interpretation guides
|
||
#
|
||
# Client-Specific Reports:
|
||
# - R90: Agronomic Support (for AURA project)
|
||
# - R91: Cane Supply (for ANGATA, CHEMBA, XINAVANE, ESA)
|
||
#
|
||
# INPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||
# (from Script 80)
|
||
#
|
||
# OUTPUT:
|
||
# - laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_*.docx
|
||
# - Formatted Word document (~5-10 MB)
|
||
#
|
||
# PARAMETERS:
|
||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04)
|
||
# REPORT_TYPE: agronomic or cane_supply (determines which Rmd file to render)
|
||
#
|
||
# COMMAND #1 - AGRONOMIC REPORT (AURA project):
|
||
# From R console or R script:
|
||
#
|
||
# rmarkdown::render(
|
||
rmarkdown::render(
|
||
"r_app/90_CI_report_with_kpis_agronomic_support.Rmd",
|
||
params = list(data_dir = "aura", report_date = as.Date("2026-02-18"), language = "en" ),
|
||
output_file = "SmartCane_Report_agronomic_support_aura_2026-02-18_en_test.docx",
|
||
output_dir = "laravel_app/storage/app/aura/reports"
|
||
)
|
||
|
||
rmarkdown::render(
|
||
"r_app/90_CI_report_with_kpis_agronomic_support.Rmd",
|
||
params = list(data_dir = "aura", report_date = as.Date("2026-02-18"), language = "es-mx" ),
|
||
output_file = "SmartCane_Report_agronomic_support_aura_2026-02-18_es-mx_test.docx",
|
||
output_dir = "laravel_app/storage/app/aura/reports"
|
||
)
|
||
#
|
||
# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
|
||
# From R console or R script:
|
||
#
|
||
# rmarkdown::render(
|
||
rmarkdown::render(
|
||
"r_app/91_CI_report_with_kpis_cane_supply.Rmd",
|
||
params = list(data_dir = "angata", report_date = as.Date("2026-02-23")),
|
||
output_file = "SmartCane_Report_cane_supply_angata_2026-02-23_en.docx",
|
||
output_dir = "laravel_app/storage/app/angata/reports"
|
||
)
|
||
#
|