shaving off some more details...
This commit is contained in:
parent
e16677eb78
commit
3ee3f9e31c
613
MANUAL_PIPELINE_RUNNER.R
Normal file
613
MANUAL_PIPELINE_RUNNER.R
Normal file
|
|
@ -0,0 +1,613 @@
|
||||||
|
# ==============================================================================
|
||||||
|
# SMARTCANE MANUAL PIPELINE RUNNER
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# This file documents all pipeline steps as MANUAL COPY-PASTE COMMANDS.
|
||||||
|
# Do NOT run this script directly - instead, copy individual commands and
|
||||||
|
# paste them into your PowerShell terminal.
|
||||||
|
#
|
||||||
|
# This approach allows you to:
|
||||||
|
# - Run steps one at a time and inspect outputs
|
||||||
|
# - Re-run failed steps without re-running successful ones
|
||||||
|
# - Monitor progress between steps
|
||||||
|
# - Troubleshoot issues more easily than with automated pipeline
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
# PIPELINE SEQUENCE (IN ORDER)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# 1. Python: Download Planet satellite imagery (optional - only if new data needed)
|
||||||
|
# 2. R10: Split farm TIFFs into per-field directory structure
|
||||||
|
# 3. R20: Extract Canopy Index (CI) from 4-band imagery
|
||||||
|
# 4. R30: Interpolate growth model (smooth CI time series)
|
||||||
|
# 5. R21: Convert CI data to CSV format for Python
|
||||||
|
# 6. Python31: Harvest imminent predictions (optional - requires harvest.xlsx)
|
||||||
|
# 7. R40: Create weekly mosaic TIFFs
|
||||||
|
# 8. R80: Calculate KPIs (field uniformity, trends, stress)
|
||||||
|
# 9. R90/91: Generate Word reports (optional - Agronomic or Cane Supply)
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
# BEFORE YOU START
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# 1. Open PowerShell in the smartcane root directory:
|
||||||
|
# C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\
|
||||||
|
#
|
||||||
|
# 2. Define your parameters ONCE at the top of the session:
|
||||||
|
#
|
||||||
|
# $PROJECT = "angata" # Project: angata, chemba, xinavane, esa, simba
|
||||||
|
# $END_DATE = "2026-02-04" # YYYY-MM-DD format (e.g., 2026-02-04)
|
||||||
|
# $OFFSET = 7 # Days to look back (e.g., 7 for one week)
|
||||||
|
# $WEEK = 6 # ISO week number (1-53) - auto-calculated from END_DATE
|
||||||
|
# $YEAR = 2026 # ISO year - auto-calculated from END_DATE
|
||||||
|
#
|
||||||
|
# 3. Use these variables in the commands below by replacing [PROJECT], [END_DATE], etc.
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
# COMMAND REFERENCE
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 0: PYTHON - Download Planet Satellite Imagery (OPTIONAL)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Download 4-band (RGB+NIR) satellite imagery from Planet Labs API
|
||||||
|
# Downloads to: laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif
|
||||||
|
#
|
||||||
|
# WHEN TO RUN:
|
||||||
|
# - Only needed if you have new dates to process
|
||||||
|
# - Pipeline skips dates already in merged_tif/ or field_tiles/
|
||||||
|
# - First-time setup: download for your date range
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
# DATE: YYYY-MM-DD format (e.g., 2026-02-04)
|
||||||
|
# RESOLUTION: 3 meters (default) - can also use 5, 10
|
||||||
|
# --cleanup: Delete intermediate files after download
|
||||||
|
# --clear-all: Clear all output folders before downloading
|
||||||
|
#
|
||||||
|
# COMMAND #1 - Single Date Download:
|
||||||
|
#
|
||||||
|
# cd python_app
|
||||||
|
# python 00_download_8band_pu_optimized.py [PROJECT] --date [DATE] --resolution 3 --cleanup
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
|
||||||
|
#
|
||||||
|
# COMMAND #2 - Batch Download (Multiple Dates):
|
||||||
|
#
|
||||||
|
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
|
||||||
|
#
|
||||||
|
# Note: Planet API requires authentication (PLANET_API_KEY environment variable)
|
||||||
|
# Cost: ~1,500-2,000 PU per date
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 1: R10 - Create Per-Field TIFF Structure
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Split farm-wide GeoTIFFs into per-field directory structure.
|
||||||
|
# Transforms: merged_tif/{DATE}.tif (single file)
|
||||||
|
# → field_tiles/{FIELD_ID}/{DATE}.tif (per-field files)
|
||||||
|
# This enables clean, scalable processing in downstream scripts.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif (4-band RGB+NIR)
|
||||||
|
# - Field boundaries: laravel_app/storage/app/{PROJECT}/pivot.geojson
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif
|
||||||
|
# - One TIFF per field per date (1185 fields × N dates in Angata)
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
|
||||||
|
# Storage location: laravel_app/storage/app/angata/field_tiles/
|
||||||
|
# Script execution time: 5-10 minutes (depends on number of dates)
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 2: R20 - Extract Chlorophyll Index (CI)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Calculate Chlorophyll Index from 4-band imagery and create 5-band output TIFFs.
|
||||||
|
# Also extracts CI statistics per sub_field for daily tracking.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif (4-band)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif (5-band with CI)
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD_ID}/{DATE}.rds
|
||||||
|
#
|
||||||
|
# EXPECTED BEHAVIOR:
|
||||||
|
# If field_tiles_CI/ or daily_vals/ missing files, Script 20 will process them
|
||||||
|
# Script 20 skips files that already exist (to avoid re-processing)
|
||||||
|
# ⚠️ IF NOT ALL FILES CREATED: See troubleshooting section below
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - date range end
|
||||||
|
# OFFSET: Days to look back (e.g., 7 for one week window)
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
|
||||||
|
# Example: 1185 fields × 8 dates = 9,480 files in field_tiles_CI/
|
||||||
|
# Storage location: laravel_app/storage/app/angata/field_tiles_CI/
|
||||||
|
# Script execution time: 10-20 minutes (depends on number of dates+fields)
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# Script 20 processes dates between (END_DATE - OFFSET) and END_DATE
|
||||||
|
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
|
||||||
|
# To process all existing merged_tif files: Use large OFFSET (e.g., 365)
|
||||||
|
#
|
||||||
|
# TROUBLESHOOTING:
|
||||||
|
# ❌ If field_tiles_CI has fewer files than field_tiles:
|
||||||
|
# - Check if all field_tiles/{FIELD}/{DATE}.tif files exist
|
||||||
|
# - Script 20 may be skipping due to incomplete source files
|
||||||
|
# - Solution: Delete problematic files from field_tiles and re-run Script 10
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 3: R30 - Interpolate Growth Model
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Smooth CI time series using LOESS interpolation to fill gaps.
|
||||||
|
# Creates continuous growth curves for each field across all measurement dates.
|
||||||
|
# Enables trend analysis, yield prediction, and cumulative growth metrics.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - Daily CI statistics from Script 20 (field_tiles_CI/ per-field RDS files)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
# - (This is the growth model output used by Script 21 and 80)
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
# Contains: Interpolated CI data for all fields (wide format)
|
||||||
|
# Script execution time: 5-15 minutes
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 4: R21 - Convert CI RDS to CSV (Python Format)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Convert growth model output from R's RDS format to Python-compatible CSV.
|
||||||
|
# Transforms from wide format (fields × dates) to long format (one row per field-date pair).
|
||||||
|
# Prepares data for Python harvest detection models.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
# (Output from Script 30)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||||
|
# - Columns: field, sub_field, Date, FitData, DOY, value
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: ci_data_for_python.csv (~5-10 MB)
|
||||||
|
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
|
||||||
|
# Script execution time: 1-2 minutes
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 5: PYTHON31 - Harvest Imminent Predictions (OPTIONAL)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Predict which fields are approaching harvest in the next 28 days.
|
||||||
|
# Uses neural network (Model 307) trained on historical harvest dates.
|
||||||
|
# Generates weekly probability scores for operational harvest scheduling.
|
||||||
|
#
|
||||||
|
# REQUIRES:
|
||||||
|
# - harvest.xlsx with field planting/harvest dates
|
||||||
|
# - ci_data_for_python.csv from Script 21
|
||||||
|
# - PyTorch environment (conda pytorch_gpu)
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/Data/harvest.xlsx
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||||
|
# - Columns: field, sub_field, imminent_prob, detected_prob, week, year, as_of_date, num_days
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||||
|
# Rows: One per field (e.g., 1185 rows for Angata)
|
||||||
|
# Script execution time: 2-5 minutes
|
||||||
|
#
|
||||||
|
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 6: R40 - Create Weekly Mosaic TIFFs
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Aggregate daily per-field CI TIFFs into weekly mosaics.
|
||||||
|
# Handles multiple dates (full week) with maximum CI value per pixel.
|
||||||
|
# Creates 5-band output for reporting and KPI calculations.
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif
|
||||||
|
# (Daily per-field CI TIFFs from Script 20)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_{WW}_{YYYY}.tif
|
||||||
|
# - One per field per week (e.g., 1185 fields × 1 week = 1,185 files)
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - determines ISO week
|
||||||
|
# OFFSET: Days to look back (e.g., 7 for one week window)
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
#
|
||||||
|
# COMMAND:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R [END_DATE] [OFFSET] [PROJECT]
|
||||||
|
#
|
||||||
|
# Example (one week window):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||||||
|
#
|
||||||
|
# Example (two week window):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 14 angata
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# Location: laravel_app/storage/app/angata/weekly_mosaic/
|
||||||
|
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
|
||||||
|
# Files created: #fields (e.g., 1185 for Angata)
|
||||||
|
# Storage: ~50-100 MB total for all mosaic TIFFs
|
||||||
|
# Script execution time: 5-10 minutes
|
||||||
|
#
|
||||||
|
# NOTE: Files are named with ISO week number (WW) and year (YYYY)
|
||||||
|
# Week calculation is automatic based on END_DATE
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 7: R80 - Calculate Key Performance Indicators (KPIs)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Calculate per-field metrics from weekly mosaic TIFFs:
|
||||||
|
# - Field uniformity (CV - Coefficient of Variation)
|
||||||
|
# - Growth trends (4-week and 8-week)
|
||||||
|
# - Area change detection
|
||||||
|
# - TCH forecast
|
||||||
|
# - Spatial clustering (weed/stress detection)
|
||||||
|
# - Generates Excel export for dashboards and reporting
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_*.tif
|
||||||
|
# - Field boundaries (pivot.geojson)
|
||||||
|
# - Harvest data (harvest.xlsx)
|
||||||
|
# - Historical stats cache (RDS from previous weeks)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds (cached stats)
|
||||||
|
# - 21 columns with field-level KPIs and alerts
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
# WEEK: ISO week number (1-53, optional - default current week)
|
||||||
|
# YEAR: ISO year (optional - default current year)
|
||||||
|
#
|
||||||
|
# COMMAND #1 - Current Week (Auto-detects from TODAY):
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT]
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata
|
||||||
|
#
|
||||||
|
# COMMAND #2 - Specific Week & Year:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT] [WEEK] [YEAR]
|
||||||
|
#
|
||||||
|
# Example (Week 5, Year 2026):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
# Rows: One per field (e.g., 1185 for Angata)
|
||||||
|
# Columns: 21 KPI columns (uniformity, trend, alerts, etc.)
|
||||||
|
# Location: laravel_app/storage/app/angata/output/
|
||||||
|
# Script execution time: 10-20 minutes
|
||||||
|
#
|
||||||
|
# EXPECTED COLUMNS:
|
||||||
|
# field, sub_field, phase, cv (uniformity), ci_mean, area_ha, area_ac,
|
||||||
|
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
|
||||||
|
# spatial_cluster, alert_urgency, alert_type, alert_message, etc.
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# STEP 8: R90/R91 - Generate Word Report (OPTIONAL)
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# PURPOSE:
|
||||||
|
# Generate formatted Word report (.docx) with:
|
||||||
|
# - KPI summary tables and charts
|
||||||
|
# - Per-field performance metrics
|
||||||
|
# - Alerts and recommendations
|
||||||
|
# - Interpretation guides
|
||||||
|
#
|
||||||
|
# Client-Specific Reports:
|
||||||
|
# - R90: Agronomic Support (for AURA project)
|
||||||
|
# - R91: Cane Supply (for ANGATA, CHEMBA, XINAVANE, ESA)
|
||||||
|
#
|
||||||
|
# INPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
# (from Script 80)
|
||||||
|
#
|
||||||
|
# OUTPUT:
|
||||||
|
# - laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_*.docx
|
||||||
|
# - Formatted Word document (~5-10 MB)
|
||||||
|
#
|
||||||
|
# PARAMETERS:
|
||||||
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04)
|
||||||
|
# REPORT_TYPE: agronomic or cane_supply (determines which Rmd file to render)
|
||||||
|
#
|
||||||
|
# COMMAND #1 - AGRONOMIC REPORT (AURA project):
|
||||||
|
# From R console or R script:
|
||||||
|
#
|
||||||
|
# rmarkdown::render(
|
||||||
|
# "r_app/90_CI_report_with_kpis_simple.Rmd",
|
||||||
|
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
||||||
|
# output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx",
|
||||||
|
# output_dir = "laravel_app/storage/app/angata/reports"
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
|
||||||
|
# From R console or R script:
|
||||||
|
#
|
||||||
|
# rmarkdown::render(
|
||||||
|
# "r_app/91_CI_report_with_kpis_Angata.Rmd",
|
||||||
|
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
||||||
|
# output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
|
||||||
|
# output_dir = "laravel_app/storage/app/angata/reports"
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# EXPECTED OUTPUT:
|
||||||
|
# File: SmartCane_Report_*_{PROJECT}_{DATE}.docx
|
||||||
|
# Location: laravel_app/storage/app/{PROJECT}/reports/
|
||||||
|
# Script execution time: 5-10 minutes
|
||||||
|
#
|
||||||
|
# NOTE:
|
||||||
|
# These are R Markdown files and cannot be run directly via Rscript
|
||||||
|
# Use rmarkdown::render() from an R interactive session or wrapper script
|
||||||
|
# See run_full_pipeline.R for an automated example
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# QUICK REFERENCE: Common Workflows
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# WORKFLOW A: Weekly Update (Most Common)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# Goal: Process latest week of data through full pipeline
|
||||||
|
#
|
||||||
|
# Parameters:
|
||||||
|
# $PROJECT = "angata"
|
||||||
|
# $END_DATE = "2026-02-04" # Today or latest date available
|
||||||
|
# $OFFSET = 7 # One week back
|
||||||
|
#
|
||||||
|
# Steps:
|
||||||
|
# 1. SKIP Python download (if you already have data)
|
||||||
|
# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||||||
|
# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
|
||||||
|
# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||||
|
# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||||
|
# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||||||
|
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata
|
||||||
|
# 8. OPTIONAL R91 (Cane Supply) - Use automated runner:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
|
||||||
|
# OR from R console:
|
||||||
|
# rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd",
|
||||||
|
# params=list(data_dir="angata", report_date=as.Date("2026-02-04")),
|
||||||
|
# output_file="SmartCane_Report_cane_supply_angata_2026-02-04.docx",
|
||||||
|
# output_dir="laravel_app/storage/app/angata/reports")
|
||||||
|
#
|
||||||
|
# Execution time: ~60-90 minutes total
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# WORKFLOW B: Initial Setup (Large Backfill)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# Goal: Process multiple weeks of historical data
|
||||||
|
#
|
||||||
|
# Steps:
|
||||||
|
# 1. Python download (your entire date range)
|
||||||
|
# 2. Run R10 once (processes all dates)
|
||||||
|
# 3. Run R20 with large offset to process all historical dates:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
|
||||||
|
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
|
||||||
|
# 4. Run R30 once (growth model full season)
|
||||||
|
# 5. Run R21 once (CSV export)
|
||||||
|
# 6. Run R40 with specific week windows as needed
|
||||||
|
# 7. Run R80 for each week you want KPIs for
|
||||||
|
|
||||||
|
# 6. For each week, run:
|
||||||
|
# - R40 with different END_DATE values (one per week)
|
||||||
|
# - R80 with different WEEK/YEAR values (one per week)
|
||||||
|
# - R91 optional (one per week report)
|
||||||
|
#
|
||||||
|
# Pro tip: Script R40 with offset=14 covers two weeks at once
|
||||||
|
# Then R40 again with offset=7 for just one week
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# WORKFLOW C: Troubleshooting (Check Intermediate Outputs)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# Goal: Verify outputs before moving to next step
|
||||||
|
#
|
||||||
|
# After R10: Check field_tiles/{FIELD_ID}/ has #dates files
|
||||||
|
# After R20: Check field_tiles_CI/{FIELD_ID}/ has same #dates files
|
||||||
|
# After R30: Check Data/extracted_ci/cumulative_vals/ has All_pivots_*.rds
|
||||||
|
# After R40: Check weekly_mosaic/{FIELD_ID}/ has week_WW_YYYY.tif per week
|
||||||
|
# After R80: Check output/ has {PROJECT}_field_analysis_week*.xlsx
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# TROUBLESHOOTING
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# ISSUE: R20 not processing all field_tiles files
|
||||||
|
# ────────────────────────────────────────────────
|
||||||
|
# Symptom: field_tiles has 496 files, field_tiles_CI only has 5
|
||||||
|
#
|
||||||
|
# Possible causes:
|
||||||
|
# 1. Source files incomplete or corrupted
|
||||||
|
# 2. Script 20 skips because CI TIFF already exists (even if incomplete)
|
||||||
|
# 3. Partial run from previous attempt
|
||||||
|
#
|
||||||
|
# Solutions:
|
||||||
|
# 1. Delete the small number of files in field_tiles_CI/{FIELD}/ (don't delete all!)
|
||||||
|
# rm laravel_app/storage/app/angata/field_tiles_CI/{fieldnum}/*
|
||||||
|
# 2. Re-run Script 20
|
||||||
|
# 3. If still fails, delete field_tiles_CI completely and re-run Script 20
|
||||||
|
# rm -r laravel_app/storage/app/angata/field_tiles_CI/
|
||||||
|
#
|
||||||
|
# ISSUE: Script 80 says "No per-field mosaic files found"
|
||||||
|
# ────────────────────────────────────────────────────────
|
||||||
|
# Symptom: R80 fails to calculate KPIs
|
||||||
|
#
|
||||||
|
# Possible causes:
|
||||||
|
# 1. Script 40 hasn't run yet (weekly_mosaic doesn't exist)
|
||||||
|
# 2. Wrong END_DATE or WEEK/YEAR combination
|
||||||
|
# 3. weekly_mosaic/{FIELD}/ directory missing (old format?)
|
||||||
|
#
|
||||||
|
# Solutions:
|
||||||
|
# 1. Ensure Script 40 has completed: Check weekly_mosaic/{FIELD}/ exists with week_WW_YYYY.tif
|
||||||
|
# 2. Verify END_DATE is within date range of available CI data
|
||||||
|
# 3. For current week: End date must be THIS week (same ISO week as today)
|
||||||
|
#
|
||||||
|
# ISSUE: Python download fails ("Not authorized")
|
||||||
|
# ────────────────────────────────────────────────
|
||||||
|
# Symptom: python 00_download_8band_pu_optimized.py fails with authentication error
|
||||||
|
#
|
||||||
|
# Cause: PLANET_API_KEY environment variable not set
|
||||||
|
#
|
||||||
|
# Solution:
|
||||||
|
# 1. Save your Planet API key: $env:PLANET_API_KEY = "your_key_here"
|
||||||
|
# 2. Verify: $env:PLANET_API_KEY (should show your key)
|
||||||
|
# 3. Try download again
|
||||||
|
#
|
||||||
|
# ISSUE: R30 takes too long
|
||||||
|
# ──────────────────────────
|
||||||
|
# Symptom: Script 30 running for >30 minutes
|
||||||
|
#
|
||||||
|
# Cause: LOESS interpolation is slow with many dates/fields
|
||||||
|
#
|
||||||
|
# Solution:
|
||||||
|
# 1. This is normal - large date ranges slow down interpolation
|
||||||
|
# 2. Subsequent runs are faster (cached results)
|
||||||
|
# 3. If needed: reduce offset or run fewer weeks at a time
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# SUMMARY OF FILES CREATED BY EACH SCRIPT
|
||||||
|
# ==============================================================================
|
||||||
|
#
|
||||||
|
# Script 10 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD}/{DATE}.tif
|
||||||
|
#
|
||||||
|
# Script 20 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD}/{DATE}.tif
|
||||||
|
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds
|
||||||
|
#
|
||||||
|
# Script 30 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
|
#
|
||||||
|
# Script 21 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||||
|
#
|
||||||
|
# Python 31 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||||
|
#
|
||||||
|
# Script 40 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD}/{DATE}/week_{WW}_{YYYY}.tif
|
||||||
|
#
|
||||||
|
# Script 80 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds
|
||||||
|
#
|
||||||
|
# Script 90/91 creates:
|
||||||
|
# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
|
@ -1,499 +0,0 @@
|
||||||
#' Combined: Create master grid and split TIFFs into tiles
|
|
||||||
#' ====================================================================
|
|
||||||
#'
|
|
||||||
#' Purpose:
|
|
||||||
#' 1. Check all daily TIFFs for matching extents
|
|
||||||
#' 2. Create master 5×5 grid covering all TIFFs
|
|
||||||
#' 3. Split each daily TIFF into 25 tiles using the master grid
|
|
||||||
#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
|
|
||||||
#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18
|
|
||||||
|
|
||||||
|
|
||||||
library(terra)
|
|
||||||
library(sf)
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CONFIGURATION & COMMAND-LINE ARGUMENTS
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Parse command-line arguments for date filtering
|
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
|
||||||
|
|
||||||
# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17
|
|
||||||
start_date <- NULL
|
|
||||||
end_date <- NULL
|
|
||||||
|
|
||||||
if (length(args) >= 1) {
|
|
||||||
start_date <- as.Date(args[1])
|
|
||||||
cat("Filtering: start date =", as.character(start_date), "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(args) >= 2) {
|
|
||||||
end_date <- as.Date(args[2])
|
|
||||||
cat("Filtering: end date =", as.character(end_date), "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
PROJECT <- "angata"
|
|
||||||
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
|
|
||||||
|
|
||||||
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
|
|
||||||
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
|
|
||||||
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
|
|
||||||
GRID_NROWS <- 5
|
|
||||||
GRID_NCOLS <- 5
|
|
||||||
|
|
||||||
# Construct grid-specific subfolder path
|
|
||||||
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
|
|
||||||
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
|
|
||||||
|
|
||||||
# Load field boundaries for overlap checking
|
|
||||||
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
|
|
||||||
|
|
||||||
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
|
|
||||||
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n[PART 1] Creating Master Grid\n")
|
|
||||||
|
|
||||||
# Load field boundaries for overlap checking
|
|
||||||
cat("\n[1] Checking for existing master grid...\n")
|
|
||||||
|
|
||||||
# Check if master grid already exists
|
|
||||||
MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
|
||||||
|
|
||||||
if (file.exists(MASTER_GRID_PATH)) {
|
|
||||||
cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "")
|
|
||||||
master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE)
|
|
||||||
field_boundaries_sf <- NULL # No need to load pivot.geojson
|
|
||||||
field_boundaries_vect <- NULL
|
|
||||||
|
|
||||||
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
|
|
||||||
|
|
||||||
} else {
|
|
||||||
# No existing grid - need to create one from pivot.geojson
|
|
||||||
cat(" No existing grid found. Creating new one from pivot.geojson...\n")
|
|
||||||
|
|
||||||
if (!file.exists(GEOJSON_PATH)) {
|
|
||||||
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
|
|
||||||
"Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ",
|
|
||||||
"from the same directory as a previous successful run (grid already exists).")
|
|
||||||
}
|
|
||||||
|
|
||||||
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
|
|
||||||
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
|
|
||||||
|
|
||||||
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Try to find a name column (only if field_boundaries_sf exists)
|
|
||||||
if (!is.null(field_boundaries_sf)) {
|
|
||||||
field_names <- NA
|
|
||||||
if ("name" %in% names(field_boundaries_sf)) {
|
|
||||||
field_names <- field_boundaries_sf$name
|
|
||||||
} else if ("field" %in% names(field_boundaries_sf)) {
|
|
||||||
field_names <- field_boundaries_sf$field
|
|
||||||
} else if ("field_name" %in% names(field_boundaries_sf)) {
|
|
||||||
field_names <- field_boundaries_sf$field_name
|
|
||||||
} else {
|
|
||||||
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Helper function: Check if a tile overlaps with any field (simple bbox overlap)
|
|
||||||
tile_overlaps_fields <- function(tile_extent, field_geoms) {
|
|
||||||
tryCatch({
|
|
||||||
# Simple bounding box overlap test - no complex geometry operations
|
|
||||||
# Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR
|
|
||||||
# box1.ymax < box2.ymin OR box1.ymin > box2.ymax)
|
|
||||||
|
|
||||||
# For each field geometry, check if it overlaps with tile bbox
|
|
||||||
for (i in seq_len(length(field_geoms))) {
|
|
||||||
# Skip empty geometries
|
|
||||||
if (st_is_empty(field_geoms[i])) {
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get field bbox
|
|
||||||
field_bbox <- st_bbox(field_geoms[i])
|
|
||||||
|
|
||||||
# Check bbox overlap (simple coordinate comparison)
|
|
||||||
x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax)
|
|
||||||
y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax)
|
|
||||||
|
|
||||||
if (x_overlap && y_overlap) {
|
|
||||||
return(TRUE) # Found overlap!
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return(FALSE) # No overlap found
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
|
|
||||||
return(TRUE) # Default to including tile if there's an error
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
cat("\n[2] Checking TIFF extents...\n")
|
|
||||||
|
|
||||||
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
|
|
||||||
tiff_files <- sort(tiff_files)
|
|
||||||
|
|
||||||
# Filter by date range if specified
|
|
||||||
if (!is.null(start_date) || !is.null(end_date)) {
|
|
||||||
cat("\nApplying date filter...\n")
|
|
||||||
|
|
||||||
file_dates <- as.Date(sub("\\.tif$", "", tiff_files))
|
|
||||||
|
|
||||||
if (!is.null(start_date) && !is.null(end_date)) {
|
|
||||||
keep_idx <- file_dates >= start_date & file_dates <= end_date
|
|
||||||
cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "")
|
|
||||||
} else if (!is.null(start_date)) {
|
|
||||||
keep_idx <- file_dates >= start_date
|
|
||||||
cat(" From: ", as.character(start_date), "\n", sep = "")
|
|
||||||
} else {
|
|
||||||
keep_idx <- file_dates <= end_date
|
|
||||||
cat(" Until: ", as.character(end_date), "\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
tiff_files <- tiff_files[keep_idx]
|
|
||||||
cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(tiff_files) == 0) {
|
|
||||||
stop("No TIFF files found in ", TIFF_FOLDER)
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
|
|
||||||
cat(" Checking extents... (this may take a while)\n")
|
|
||||||
|
|
||||||
# Load all extents - ONE TIME, upfront
|
|
||||||
extents <- list()
|
|
||||||
for (i in seq_along(tiff_files)) {
|
|
||||||
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
|
|
||||||
raster <- terra::rast(tiff_path)
|
|
||||||
ext <- terra::ext(raster)
|
|
||||||
extents[[i]] <- ext
|
|
||||||
|
|
||||||
# Progress indicator every 50 files
|
|
||||||
if (i %% 50 == 0) {
|
|
||||||
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" ✓ All extents loaded\n")
|
|
||||||
|
|
||||||
# Check if all extents match
|
|
||||||
cat("\n[3] Comparing extents...\n")
|
|
||||||
|
|
||||||
tolerance <- 1e-8
|
|
||||||
all_match <- TRUE
|
|
||||||
first_ext <- extents[[1]]
|
|
||||||
|
|
||||||
for (i in 2:length(extents)) {
|
|
||||||
curr_ext <- extents[[i]]
|
|
||||||
match <- (
|
|
||||||
abs(curr_ext$xmin - first_ext$xmin) < tolerance &&
|
|
||||||
abs(curr_ext$xmax - first_ext$xmax) < tolerance &&
|
|
||||||
abs(curr_ext$ymin - first_ext$ymin) < tolerance &&
|
|
||||||
abs(curr_ext$ymax - first_ext$ymax) < tolerance
|
|
||||||
)
|
|
||||||
if (!match) {
|
|
||||||
all_match <- FALSE
|
|
||||||
cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "")
|
|
||||||
cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ",
|
|
||||||
"Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "")
|
|
||||||
cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ",
|
|
||||||
"Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (all_match) {
|
|
||||||
cat(" ✓ All TIFF extents MATCH perfectly!\n")
|
|
||||||
} else {
|
|
||||||
cat(" ⚠️ Extents differ - creating master extent covering all\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create master extent
|
|
||||||
cat("\n[4] Creating master extent...\n")
|
|
||||||
|
|
||||||
master_xmin <- min(sapply(extents, function(e) e$xmin))
|
|
||||||
master_xmax <- max(sapply(extents, function(e) e$xmax))
|
|
||||||
master_ymin <- min(sapply(extents, function(e) e$ymin))
|
|
||||||
master_ymax <- max(sapply(extents, function(e) e$ymax))
|
|
||||||
|
|
||||||
x_range_m <- (master_xmax - master_xmin) * 111320
|
|
||||||
y_range_m <- (master_ymax - master_ymin) * 111320
|
|
||||||
|
|
||||||
cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ",
|
|
||||||
"Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "")
|
|
||||||
cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "")
|
|
||||||
|
|
||||||
# Auto-determine grid size based on ROI dimensions
|
|
||||||
if (x_range_m < 10000 && y_range_m < 10000) {
|
|
||||||
cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n")
|
|
||||||
GRID_NROWS <- 1
|
|
||||||
GRID_NCOLS <- 1
|
|
||||||
} else {
|
|
||||||
cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n")
|
|
||||||
GRID_NROWS <- 5
|
|
||||||
GRID_NCOLS <- 5
|
|
||||||
}
|
|
||||||
|
|
||||||
N_TILES <- GRID_NROWS * GRID_NCOLS
|
|
||||||
|
|
||||||
# Check if master grid already exists
|
|
||||||
cat("\n[5] Checking if master grid exists...\n")
|
|
||||||
|
|
||||||
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
|
||||||
|
|
||||||
if (file.exists(master_grid_file)) {
|
|
||||||
cat(" ✓ Master grid exists! Loading existing grid...\n")
|
|
||||||
master_grid_sf <- st_read(master_grid_file, quiet = TRUE)
|
|
||||||
master_grid_vect <- terra::vect(master_grid_file)
|
|
||||||
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
|
|
||||||
} else {
|
|
||||||
cat(" Grid does not exist. Creating new master grid...\n")
|
|
||||||
|
|
||||||
# Create 5×5 grid
|
|
||||||
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
|
|
||||||
|
|
||||||
master_bbox <- st_bbox(c(
|
|
||||||
xmin = master_xmin,
|
|
||||||
xmax = master_xmax,
|
|
||||||
ymin = master_ymin,
|
|
||||||
ymax = master_ymax
|
|
||||||
), crs = 4326)
|
|
||||||
|
|
||||||
bbox_sf <- st_as_sfc(master_bbox)
|
|
||||||
|
|
||||||
master_grid <- st_make_grid(
|
|
||||||
bbox_sf,
|
|
||||||
n = c(GRID_NCOLS, GRID_NROWS),
|
|
||||||
what = "polygons"
|
|
||||||
)
|
|
||||||
|
|
||||||
master_grid_sf <- st_sf(
|
|
||||||
tile_id = sprintf("%02d", 1:length(master_grid)),
|
|
||||||
geometry = master_grid
|
|
||||||
)
|
|
||||||
|
|
||||||
cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "")
|
|
||||||
|
|
||||||
# Convert to SpatVector for use in makeTiles
|
|
||||||
master_grid_vect <- terra::vect(master_grid_sf)
|
|
||||||
|
|
||||||
# Save master grid
|
|
||||||
if (!dir.exists(OUTPUT_FOLDER)) {
|
|
||||||
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
}
|
|
||||||
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
|
|
||||||
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
|
|
||||||
|
|
||||||
# If grid was loaded from file, it's already filtered. Skip filtering.
|
|
||||||
if (!file.exists(MASTER_GRID_PATH)) {
|
|
||||||
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
|
|
||||||
|
|
||||||
# Check which tiles overlap with any field
|
|
||||||
overlapping_tile_indices <- c()
|
|
||||||
for (tile_idx in 1:nrow(master_grid_sf)) {
|
|
||||||
tile_geom <- master_grid_sf[tile_idx, ]
|
|
||||||
|
|
||||||
# Check overlap with any field
|
|
||||||
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
|
|
||||||
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
|
|
||||||
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
|
|
||||||
|
|
||||||
# Create filtered grid with only overlapping tiles
|
|
||||||
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
|
|
||||||
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
|
|
||||||
} else {
|
|
||||||
cat("\n[7] Using pre-filtered grid (already loaded from file)...\n")
|
|
||||||
# Grid was already loaded - it's already filtered
|
|
||||||
filtered_grid_sf <- master_grid_sf
|
|
||||||
}
|
|
||||||
|
|
||||||
# Convert to SpatVector for makeTiles
|
|
||||||
filtered_grid_vect <- terra::vect(filtered_grid_sf)
|
|
||||||
|
|
||||||
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
|
|
||||||
cat("\n[8] Processing each date independently...\n")
|
|
||||||
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
|
|
||||||
|
|
||||||
total_tiles_created <- 0
|
|
||||||
dates_skipped <- 0
|
|
||||||
dates_processed <- 0
|
|
||||||
|
|
||||||
for (file_idx in seq_along(tiff_files)) {
|
|
||||||
tiff_file <- tiff_files[file_idx]
|
|
||||||
date_str <- gsub("\\.tif$", "", tiff_file)
|
|
||||||
|
|
||||||
# Create date-specific output folder
|
|
||||||
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
|
|
||||||
|
|
||||||
# CHECK: Skip if date already processed (RESUME-SAFE)
|
|
||||||
if (dir.exists(date_output_folder)) {
|
|
||||||
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
|
|
||||||
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
|
|
||||||
|
|
||||||
if (length(existing_tiles) > 0) {
|
|
||||||
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
|
|
||||||
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
|
|
||||||
dates_skipped <- dates_skipped + 1
|
|
||||||
next # Skip this date
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
|
|
||||||
dates_processed <- dates_processed + 1
|
|
||||||
|
|
||||||
# Load TIFF for this date only
|
|
||||||
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
|
|
||||||
raster <- terra::rast(tiff_path)
|
|
||||||
|
|
||||||
dims <- dim(raster)
|
|
||||||
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
|
|
||||||
|
|
||||||
# Create date-specific output folder
|
|
||||||
if (!dir.exists(date_output_folder)) {
|
|
||||||
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "")
|
|
||||||
|
|
||||||
# Use makeTiles with FILTERED grid (only overlapping tiles)
|
|
||||||
tiles_list <- terra::makeTiles(
|
|
||||||
x = raster,
|
|
||||||
y = filtered_grid_vect,
|
|
||||||
filename = file.path(date_output_folder, "tile.tif"),
|
|
||||||
overwrite = TRUE
|
|
||||||
)
|
|
||||||
|
|
||||||
# Rename tiles to [DATE]_[TILE_ID].tif
|
|
||||||
for (tile_idx in seq_along(tiles_list)) {
|
|
||||||
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
|
|
||||||
tile_id <- filtered_grid_sf$tile_id[tile_idx]
|
|
||||||
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
|
|
||||||
|
|
||||||
if (file.exists(source_file)) {
|
|
||||||
file.rename(source_file, final_file)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
|
|
||||||
total_tiles_created <- total_tiles_created + length(tiles_list)
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# VERIFICATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n[9] Verifying output...\n")
|
|
||||||
|
|
||||||
# Count tiles per date folder
|
|
||||||
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
|
|
||||||
date_folders <- sort(date_folders[date_folders != "."])
|
|
||||||
|
|
||||||
total_tile_files <- 0
|
|
||||||
for (date_folder in date_folders) {
|
|
||||||
tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder),
|
|
||||||
pattern = "\\.tif$")
|
|
||||||
tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)]
|
|
||||||
total_tile_files <- total_tile_files + length(tiles_in_folder)
|
|
||||||
cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# SUMMARY
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
cat("\n\n========== SUMMARY ==========\n")
|
|
||||||
|
|
||||||
cat("\nGrid Configuration:\n")
|
|
||||||
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
|
|
||||||
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
|
||||||
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
|
||||||
|
|
||||||
cat("\nField Filtering:\n")
|
|
||||||
cat(" - Field boundaries loaded from pivot.geojson\n")
|
|
||||||
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
|
|
||||||
cat(" - Significant storage savings for sparse fields!\n")
|
|
||||||
|
|
||||||
cat("\nProcessing Summary:\n")
|
|
||||||
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
|
|
||||||
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
|
|
||||||
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
|
|
||||||
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
|
|
||||||
if (dates_processed > 0) {
|
|
||||||
avg_tiles_per_date <- total_tiles_created / dates_processed
|
|
||||||
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
|
|
||||||
}
|
|
||||||
|
|
||||||
cat("\nDirectory Structure:\n")
|
|
||||||
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
|
|
||||||
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
|
|
||||||
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
|
||||||
cat(" ├── 2024-01-15/\n")
|
|
||||||
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
|
|
||||||
cat(" │ ├── 2024-01-15_05.tif\n")
|
|
||||||
cat(" │ └── ...\n")
|
|
||||||
cat(" ├── 2024-01-16/\n")
|
|
||||||
cat(" │ └── ...\n")
|
|
||||||
cat(" └── ...\n")
|
|
||||||
|
|
||||||
cat("\n⭐ Key Benefits:\n")
|
|
||||||
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
|
|
||||||
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
|
|
||||||
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
|
|
||||||
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# WRITE TILING CONFIGURATION METADATA
|
|
||||||
# ============================================================================
|
|
||||||
# This metadata file is read by parameters_project.R to determine mosaic mode
|
|
||||||
# It allows script 40 to know what script 10 decided without re-computing
|
|
||||||
|
|
||||||
cat("\n[10] Writing tiling configuration metadata...\n")
|
|
||||||
|
|
||||||
config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json")
|
|
||||||
config_json <- paste0(
|
|
||||||
'{\n',
|
|
||||||
' "project": "', PROJECT, '",\n',
|
|
||||||
' "has_tiles": ', tolower(N_TILES > 1), ',\n',
|
|
||||||
' "grid_size": "', GRID_SIZE_LABEL, '",\n',
|
|
||||||
' "grid_rows": ', GRID_NROWS, ',\n',
|
|
||||||
' "grid_cols": ', GRID_NCOLS, ',\n',
|
|
||||||
' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n',
|
|
||||||
' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n',
|
|
||||||
' "created_date": "', Sys.Date(), '",\n',
|
|
||||||
' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n',
|
|
||||||
'}\n'
|
|
||||||
)
|
|
||||||
|
|
||||||
writeLines(config_json, config_file)
|
|
||||||
cat(" ✓ Metadata saved to: tiling_config.json\n")
|
|
||||||
cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "")
|
|
||||||
cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "")
|
|
||||||
|
|
||||||
cat("\n✓ Script complete!\n")
|
|
||||||
|
|
@ -68,7 +68,14 @@ main <- function() {
|
||||||
setwd("..")
|
setwd("..")
|
||||||
}
|
}
|
||||||
|
|
||||||
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
|
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
|
||||||
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
project_dir <- if (length(args) == 0) "angata" else args[1]
|
||||||
|
|
||||||
|
# Make project_dir available to sourced files (they execute in global scope)
|
||||||
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
|
|
||||||
|
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
|
||||||
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
|
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source("r_app/parameters_project.R")
|
source("r_app/parameters_project.R")
|
||||||
|
|
@ -85,12 +92,31 @@ main <- function() {
|
||||||
stop(e)
|
stop(e)
|
||||||
})
|
})
|
||||||
|
|
||||||
# STEP 3: Parse command-line arguments
|
# STEP 4: Set default date parameters (can be overridden by pipeline runner via assign())
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
# These control which dates Script 10 processes from merged_tif/
|
||||||
project_dir <- if (length(args) == 0) "angata" else args[1]
|
# Window: end_date - offset days to end_date
|
||||||
|
# Always coerce to correct types to avoid issues with lingering/inherited values
|
||||||
|
if (!exists("end_date") || !inherits(end_date, "Date")) {
|
||||||
|
end_date <- as.Date("2026-02-04")
|
||||||
|
safe_log(paste("Using default end_date:", end_date), "INFO")
|
||||||
|
}
|
||||||
|
if (!exists("offset") || !is.numeric(offset)) {
|
||||||
|
offset <- 7
|
||||||
|
safe_log(paste("Using default offset:", offset, "days"), "INFO")
|
||||||
|
}
|
||||||
|
|
||||||
# STEP 4: Now all utilities are loaded, proceed with script logic
|
# Ensure offset is numeric (in case it came in as a character string from environment)
|
||||||
# Load centralized path structure (creates all directories automatically)
|
if (is.character(offset)) {
|
||||||
|
offset <- as.numeric(offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate date window for processing
|
||||||
|
start_date <- end_date - offset
|
||||||
|
date_window <- seq(start_date, end_date, by = "day")
|
||||||
|
date_window_str <- format(date_window, "%Y-%m-%d")
|
||||||
|
safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO")
|
||||||
|
|
||||||
|
# STEP 5: Load centralized path structure (creates all directories automatically)
|
||||||
paths <- setup_project_directories(project_dir)
|
paths <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
safe_log(paste("Project:", project_dir))
|
safe_log(paste("Project:", project_dir))
|
||||||
|
|
@ -109,7 +135,9 @@ main <- function() {
|
||||||
|
|
||||||
# PHASE 1: Process new downloads (always runs)
|
# PHASE 1: Process new downloads (always runs)
|
||||||
# Pass field_tiles_ci_dir so it can skip dates already migrated
|
# Pass field_tiles_ci_dir so it can skip dates already migrated
|
||||||
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir)
|
# Also pass end_date and offset so only dates in window are processed
|
||||||
|
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir,
|
||||||
|
end_date = end_date, offset = offset)
|
||||||
|
|
||||||
safe_log("\n========================================", "INFO")
|
safe_log("\n========================================", "INFO")
|
||||||
safe_log("FINAL SUMMARY", "INFO")
|
safe_log("FINAL SUMMARY", "INFO")
|
||||||
|
|
|
||||||
|
|
@ -156,6 +156,11 @@ crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {
|
||||||
#' TIFFs are stored. If provided, skips dates
|
#' TIFFs are stored. If provided, skips dates
|
||||||
#' already processed and moved to field_tiles_CI/.
|
#' already processed and moved to field_tiles_CI/.
|
||||||
#' Default: NULL (process all dates).
|
#' Default: NULL (process all dates).
|
||||||
|
#' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD).
|
||||||
|
#' Default: NULL (process all available dates).
|
||||||
|
#' @param offset Integer. Optional. Number of days to look back from end_date.
|
||||||
|
#' Only used if end_date is also provided.
|
||||||
|
#' Default: NULL (process all available dates).
|
||||||
#'
|
#'
|
||||||
#' @return List with elements:
|
#' @return List with elements:
|
||||||
#' - total_created: Integer. Total field TIFFs created across all dates
|
#' - total_created: Integer. Total field TIFFs created across all dates
|
||||||
|
|
@ -187,7 +192,8 @@ crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {
|
||||||
#' result$total_created, result$total_skipped, result$total_errors))
|
#' result$total_created, result$total_skipped, result$total_errors))
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL) {
|
process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL,
|
||||||
|
end_date = NULL, offset = NULL) {
|
||||||
|
|
||||||
safe_log("\n========================================", "INFO")
|
safe_log("\n========================================", "INFO")
|
||||||
safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO")
|
safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO")
|
||||||
|
|
@ -211,6 +217,19 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel
|
||||||
full.names = TRUE
|
full.names = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# FILTER by date window if end_date and offset provided
|
||||||
|
if (!is.null(end_date) && !is.null(offset)) {
|
||||||
|
start_date <- end_date - offset
|
||||||
|
date_range <- seq(start_date, end_date, by = "day")
|
||||||
|
date_range_str <- format(date_range, "%Y-%m-%d")
|
||||||
|
|
||||||
|
# Extract dates from filenames and filter
|
||||||
|
tiff_dates <- gsub("\\.tif$", "", basename(tiff_files))
|
||||||
|
tiff_files <- tiff_files[tiff_dates %in% date_range_str]
|
||||||
|
|
||||||
|
safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO")
|
||||||
|
}
|
||||||
|
|
||||||
safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO")
|
safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO")
|
||||||
|
|
||||||
if (length(tiff_files) == 0) {
|
if (length(tiff_files) == 0) {
|
||||||
|
|
@ -226,7 +245,7 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel
|
||||||
for (tif_path in tiff_files) {
|
for (tif_path in tiff_files) {
|
||||||
tif_date <- gsub("\\.tif$", "", basename(tif_path))
|
tif_date <- gsub("\\.tif$", "", basename(tif_path))
|
||||||
|
|
||||||
# MIGRATION MODE CHECK: Skip if this date was already migrated to field_tiles_CI/
|
# CHECK 1: Skip if this date was already migrated to field_tiles_CI/
|
||||||
# (This means Script 20 already processed it and extracted RDS)
|
# (This means Script 20 already processed it and extracted RDS)
|
||||||
if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) {
|
if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) {
|
||||||
# Check if ANY field has this date in field_tiles_CI/
|
# Check if ANY field has this date in field_tiles_CI/
|
||||||
|
|
@ -249,6 +268,28 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# CHECK 2: Skip if this date already exists in field_tiles/
|
||||||
|
# (means this date has already been processed through Script 10)
|
||||||
|
if (dir.exists(field_tiles_dir)) {
|
||||||
|
date_exists_in_field_tiles <- FALSE
|
||||||
|
|
||||||
|
# Check if ANY field directory has this date
|
||||||
|
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
||||||
|
for (field_dir in field_dirs) {
|
||||||
|
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
|
||||||
|
if (file.exists(potential_file)) {
|
||||||
|
date_exists_in_field_tiles <- TRUE
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (date_exists_in_field_tiles) {
|
||||||
|
safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO")
|
||||||
|
total_skipped <- total_skipped + 1
|
||||||
|
next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
safe_log(paste("Processing:", tif_date), "INFO")
|
safe_log(paste("Processing:", tif_date), "INFO")
|
||||||
|
|
||||||
result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir)
|
result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir)
|
||||||
|
|
|
||||||
|
|
@ -1,366 +0,0 @@
|
||||||
# ============================================================================
|
|
||||||
# SCRIPT 20: Canopy Index (CI) Extraction from Satellite Imagery
|
|
||||||
# ============================================================================
|
|
||||||
# PURPOSE:
|
|
||||||
# Extract Canopy Index (CI) from 4-band or 8-band satellite imagery and
|
|
||||||
# mask by field boundaries. Supports automatic band detection, cloud masking
|
|
||||||
# with UDM2 (8-band), and per-field CI value extraction. Produces both
|
|
||||||
# per-field TIFFs and consolidated CI statistics for growth model input.
|
|
||||||
#
|
|
||||||
# INPUT DATA:
|
|
||||||
# - Source: laravel_app/storage/app/{project}/field_tiles/{FIELD}/{DATE}.tif
|
|
||||||
# - Format: GeoTIFF (4-band RGB+NIR from Planet API, or 8-band with UDM2)
|
|
||||||
# - Requirement: Field boundaries (pivot.geojson) for masking
|
|
||||||
#
|
|
||||||
# OUTPUT DATA:
|
|
||||||
# - Destination: laravel_app/storage/app/{project}/field_tiles_CI/{FIELD}/{DATE}.tif
|
|
||||||
# - Format: GeoTIFF (5-band: R,G,B,NIR,CI as float32)
|
|
||||||
# - Also exports: combined_CI/combined_CI_data.rds (wide format: fields × dates)
|
|
||||||
#
|
|
||||||
# USAGE:
|
|
||||||
# Rscript 20_ci_extraction.R [end_date] [offset] [project] [data_source]
|
|
||||||
#
|
|
||||||
# Example (Windows PowerShell):
|
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif
|
|
||||||
#
|
|
||||||
# PARAMETERS:
|
|
||||||
# - end_date: End date for processing (character, YYYY-MM-DD format)
|
|
||||||
# - offset: Days to look back from end_date (numeric, default 7)
|
|
||||||
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
|
||||||
# - data_source: Data source directory (character, optional) - "merged_tif" (default), "merged_tif_8b", "merged_final_tif"
|
|
||||||
#
|
|
||||||
# CLIENT TYPES:
|
|
||||||
# - cane_supply (ANGATA): Yes - core data processing
|
|
||||||
# - agronomic_support (AURA): Yes - supports field health monitoring
|
|
||||||
#
|
|
||||||
# DEPENDENCIES:
|
|
||||||
# - Packages: terra, sf, tidyverse, lubridate, readxl, furrr, future
|
|
||||||
# - Utils files: parameters_project.R, 00_common_utils.R, 20_ci_extraction_utils.R
|
|
||||||
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
|
|
||||||
# - Data directories: field_tiles/, field_tiles_CI/, combined_CI/
|
|
||||||
#
|
|
||||||
# NOTES:
|
|
||||||
# - CI formula: (NIR - Red) / (NIR + Red); normalized to 0-5 range
|
|
||||||
# - 8-band data automatically cloud-masked using UDM2 (band 7-8)
|
|
||||||
# - 4-band data assumes clear-sky Planet PSScene imagery
|
|
||||||
# - Parallel processing via furrr for speed optimization
|
|
||||||
# - Output RDS uses wide format (fields as rows, dates as columns) for growth model
|
|
||||||
# - Critical dependency for Script 30 (growth model) and Script 80 (KPIs)
|
|
||||||
#
|
|
||||||
# RELATED ISSUES:
|
|
||||||
# SC-112: Utilities restructuring
|
|
||||||
# SC-108: Core pipeline improvements
|
|
||||||
#
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
|
|
||||||
# 1. Load required packages
|
|
||||||
# -----------------------
|
|
||||||
suppressPackageStartupMessages({
|
|
||||||
# Spatial data handling
|
|
||||||
library(sf) # For reading/manipulating field boundaries (GeoJSON)
|
|
||||||
library(terra) # For raster operations (CI extraction from TIFFs)
|
|
||||||
|
|
||||||
# Data manipulation
|
|
||||||
library(tidyverse) # For dplyr, ggplot2, readr (data wrangling and visualization)
|
|
||||||
library(lubridate) # For date/time operations (parsing satellite dates)
|
|
||||||
|
|
||||||
# File I/O
|
|
||||||
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
|
|
||||||
library(here) # For relative path resolution (platform-independent file paths)
|
|
||||||
})
|
|
||||||
|
|
||||||
# 2. Process command line arguments
|
|
||||||
# ------------------------------
|
|
||||||
main <- function() {
|
|
||||||
# Capture command line arguments
|
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
|
||||||
|
|
||||||
# Process end_date argument
|
|
||||||
if (length(args) >= 1 && !is.na(args[1]) && args[1] != "") {
|
|
||||||
# Parse date explicitly in YYYY-MM-DD format from command line
|
|
||||||
end_date <- as.Date(args[1], format = "%Y-%m-%d")
|
|
||||||
if (is.na(end_date)) {
|
|
||||||
warning("Invalid end_date provided. Using default (current date).")
|
|
||||||
end_date <- Sys.Date()
|
|
||||||
#end_date <- "2023-10-01"
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
end_date <- Sys.Date()
|
|
||||||
#end_date <- "2023-10-01"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process offset argument
|
|
||||||
if (length(args) >= 2 && !is.na(args[2])) {
|
|
||||||
offset <- as.numeric(args[2])
|
|
||||||
if (is.na(offset) || offset <= 0) {
|
|
||||||
warning("Invalid offset provided. Using default (7 days).")
|
|
||||||
offset <- 7
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
offset <- 7
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process project_dir argument
|
|
||||||
if (length(args) >= 3 && !is.na(args[3])) {
|
|
||||||
project_dir <- as.character(args[3])
|
|
||||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
|
||||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
|
||||||
} else {
|
|
||||||
project_dir <- "angata" # Changed default from "aura" to "esa"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif)
|
|
||||||
if (length(args) >= 4 && !is.na(args[4])) {
|
|
||||||
data_source <- as.character(args[4])
|
|
||||||
# Validate data_source is a recognized option
|
|
||||||
if (!data_source %in% c("merged_tif_8b", "merged_tif", "merged_final_tif")) {
|
|
||||||
warning(paste("Data source", data_source, "not in standard list. Using as-is."))
|
|
||||||
}
|
|
||||||
} else if (exists("data_source", envir = .GlobalEnv)) {
|
|
||||||
data_source <- get("data_source", envir = .GlobalEnv)
|
|
||||||
} else {
|
|
||||||
data_source <- "merged_tif_8b" # Default to 8-band (newer data with cloud masking)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Make project_dir and data_source available globally
|
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
cat(sprintf("CI Extraction: project=%s, end_date=%s, offset=%d days, data_source=%s\n",
|
|
||||||
project_dir, format(end_date, "%Y-%m-%d"), offset, data_source))
|
|
||||||
|
|
||||||
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
|
|
||||||
ci_extraction_script <- TRUE
|
|
||||||
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
# 3. Initialize project configuration
|
|
||||||
# --------------------------------
|
|
||||||
new_project_question <- TRUE
|
|
||||||
|
|
||||||
cat("[DEBUG] Attempting to source r_app/parameters_project.R\n")
|
|
||||||
tryCatch({
|
|
||||||
source("r_app/parameters_project.R")
|
|
||||||
cat("[DEBUG] Successfully sourced r_app/parameters_project.R\n")
|
|
||||||
}, error = function(e) {
|
|
||||||
cat("[ERROR] Failed to source r_app/parameters_project.R:\n", e$message, "\n")
|
|
||||||
stop(e)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Load centralized path structure (creates all directories automatically)
|
|
||||||
paths <- setup_project_directories(project_dir)
|
|
||||||
|
|
||||||
cat("[DEBUG] Attempting to source r_app/00_common_utils.R\n")
|
|
||||||
tryCatch({
|
|
||||||
source("r_app/00_common_utils.R")
|
|
||||||
cat("[DEBUG] Successfully sourced r_app/00_common_utils.R\n")
|
|
||||||
}, error = function(e) {
|
|
||||||
cat("[ERROR] Failed to source r_app/00_common_utils.R:\n", e$message, "\n")
|
|
||||||
stop(e)
|
|
||||||
})
|
|
||||||
|
|
||||||
cat("[DEBUG] Attempting to source r_app/20_ci_extraction_utils.R\n")
|
|
||||||
tryCatch({
|
|
||||||
source("r_app/20_ci_extraction_utils.R")
|
|
||||||
cat("[DEBUG] Successfully sourced r_app/20_ci_extraction_utils.R\n")
|
|
||||||
}, error = function(e) {
|
|
||||||
cat("[ERROR] Failed to source r_app/20_ci_extraction_utils.R:\n", e$message, "\n")
|
|
||||||
stop(e)
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
# 4. Generate date list for processing
|
|
||||||
# ---------------------------------
|
|
||||||
dates <- date_list(end_date, offset)
|
|
||||||
log_message(paste("Processing data for week", dates$week, "of", dates$year))
|
|
||||||
|
|
||||||
# 4a. CHECK DAILY CI EXTRACTION - Skip dates that already have extracted files
|
|
||||||
# -------------------------------------------------------------------------
|
|
||||||
log_message("\n===== CHECKING DAILY CI EXTRACTION STATUS =====")
|
|
||||||
|
|
||||||
# Check which dates already have extracted CI files
|
|
||||||
already_extracted <- c()
|
|
||||||
missing_extraction <- c()
|
|
||||||
|
|
||||||
if (dir.exists(daily_CI_vals_dir)) {
|
|
||||||
existing_ci_files <- list.files(daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$")
|
|
||||||
# Extract dates from filenames like "extracted_2025-12-31_quadrant.rds"
|
|
||||||
already_extracted <- sub("^extracted_(.+)_.*\\.rds$", "\\1", existing_ci_files)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Find which dates in our processing range need extraction
|
|
||||||
missing_extraction <- dates$days_filter[!(dates$days_filter %in% already_extracted)]
|
|
||||||
|
|
||||||
cat(sprintf("[CI CHECK] Already extracted: %d dates\n", length(already_extracted)))
|
|
||||||
cat(sprintf("[CI CHECK] Need extraction: %d dates (from %s to %s)\n",
|
|
||||||
length(missing_extraction),
|
|
||||||
if(length(missing_extraction) > 0) min(missing_extraction) else "N/A",
|
|
||||||
if(length(missing_extraction) > 0) max(missing_extraction) else "N/A"))
|
|
||||||
|
|
||||||
# If any dates need extraction, we'll extract them
|
|
||||||
# If NO dates need extraction, we'll skip extraction but ALWAYS rebuild combined_CI_data.rds
|
|
||||||
skip_extraction <- (length(missing_extraction) == 0)
|
|
||||||
|
|
||||||
if (skip_extraction) {
|
|
||||||
log_message("✓ All dates in processing range already have extracted CI files - skipping extraction")
|
|
||||||
log_message("⚠ Will rebuild combined_CI_data.rds to ensure completeness")
|
|
||||||
}
|
|
||||||
|
|
||||||
# 4b. CHECK SOURCE DATA AVAILABILITY
|
|
||||||
# ---------------------------------------------------------------
|
|
||||||
# Verify that source data exists for dates we're going to extract
|
|
||||||
# If a date is missing from source, we'll skip it gracefully
|
|
||||||
log_message("\n===== CHECKING SOURCE DATA AVAILABILITY =====")
|
|
||||||
|
|
||||||
dates_with_source <- c()
|
|
||||||
dates_missing_source <- c()
|
|
||||||
|
|
||||||
if (!skip_extraction && length(missing_extraction) > 0) {
|
|
||||||
# Check which source dates are actually available
|
|
||||||
for (date_str in missing_extraction) {
|
|
||||||
# Look for the date in merged_tif directory
|
|
||||||
source_file_pattern <- sprintf("%s\\.tif$", date_str)
|
|
||||||
files_for_date <- list.files(planet_tif_folder, pattern = source_file_pattern)
|
|
||||||
|
|
||||||
if (length(files_for_date) > 0) {
|
|
||||||
dates_with_source <- c(dates_with_source, date_str)
|
|
||||||
} else {
|
|
||||||
dates_missing_source <- c(dates_missing_source, date_str)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cat(sprintf("[SOURCE CHECK] Dates with available source data: %d\n", length(dates_with_source)))
|
|
||||||
cat(sprintf("[SOURCE CHECK] Dates missing from source (will skip): %d\n", length(dates_missing_source)))
|
|
||||||
|
|
||||||
if (length(dates_missing_source) > 0) {
|
|
||||||
log_message(paste("⚠ Skipping extraction for missing source dates:", paste(dates_missing_source, collapse = ", ")))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# 5. Find and filter raster files by date - with grid size detection
|
|
||||||
# -----------------------------------
|
|
||||||
log_message("Searching for raster files")
|
|
||||||
|
|
||||||
# Check if tiles exist (Script 10 output) - detect grid size dynamically using centralized paths
|
|
||||||
tiles_split_base <- paths$daily_tiles_split_dir
|
|
||||||
|
|
||||||
# Detect grid size from daily_tiles_split folder structure
|
|
||||||
# Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc.
|
|
||||||
grid_size <- NA
|
|
||||||
if (dir.exists(tiles_split_base)) {
|
|
||||||
subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
|
|
||||||
# Look for grid size patterns like "5x5", "10x10", "20x20"
|
|
||||||
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
|
||||||
if (length(grid_patterns) > 0) {
|
|
||||||
grid_size <- grid_patterns[1] # Use first grid size found
|
|
||||||
log_message(paste("Detected grid size:", grid_size))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Construct tile folder path with grid size
|
|
||||||
if (!is.na(grid_size)) {
|
|
||||||
tile_folder <- file.path(tiles_split_base, grid_size)
|
|
||||||
} else {
|
|
||||||
tile_folder <- tiles_split_base
|
|
||||||
}
|
|
||||||
|
|
||||||
use_tiles <- dir.exists(tile_folder)
|
|
||||||
|
|
||||||
# Make grid_size available globally for other functions
|
|
||||||
assign("grid_size", grid_size, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
if (skip_extraction) {
|
|
||||||
log_message("\n===== SKIPPING CI EXTRACTION (all dates already processed) =====")
|
|
||||||
} else if (use_tiles) {
|
|
||||||
# Use tile-based processing
|
|
||||||
log_message(paste("Tile folder detected at", tile_folder))
|
|
||||||
log_message("Using tile-based CI extraction")
|
|
||||||
|
|
||||||
# Call the tile-based extraction function
|
|
||||||
process_ci_values_from_tiles(
|
|
||||||
dates = dates,
|
|
||||||
tile_folder = tile_folder,
|
|
||||||
field_boundaries = field_boundaries,
|
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
|
||||||
daily_CI_vals_dir = daily_CI_vals_dir,
|
|
||||||
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
|
|
||||||
merged_final_dir = merged_final,
|
|
||||||
grid_size = grid_size
|
|
||||||
)
|
|
||||||
|
|
||||||
} else {
|
|
||||||
# Use legacy full-extent processing
|
|
||||||
log_message("No tiles found. Using legacy full-extent approach")
|
|
||||||
|
|
||||||
# Use the existing utility function to find satellite images
|
|
||||||
existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter)
|
|
||||||
log_message(paste("Found", length(existing_files), "raster files for processing"))
|
|
||||||
|
|
||||||
# Process raster files and create VRT
|
|
||||||
vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt)
|
|
||||||
|
|
||||||
# Process and combine CI values
|
|
||||||
process_ci_values(dates, field_boundaries, merged_final,
|
|
||||||
field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
log_message(paste("Error in main processing:", e$message), level = "ERROR")
|
|
||||||
stop(e$message)
|
|
||||||
})
|
|
||||||
|
|
||||||
# 6. REBUILD combined_CI_data.rds from ALL daily extracted files
|
|
||||||
# -----------------------------------------------
|
|
||||||
# This ensures the combined file is complete and up-to-date
|
|
||||||
# even if extraction was skipped (because dates already existed)
|
|
||||||
# NOTE: Only rebuild if new dates were successfully extracted
|
|
||||||
# If all dates were missing from source, skip this step to avoid corrupting the file
|
|
||||||
log_message("\n===== HANDLING combined_CI_data.rds =====")
|
|
||||||
|
|
||||||
if (length(dates_with_source) == 0 && length(missing_extraction) > 0) {
|
|
||||||
# All missing dates had no source data - skip combined_CI_data.rds update
|
|
||||||
log_message("⚠ No new dates extracted (all source data missing) - skipping combined_CI_data.rds update")
|
|
||||||
} else if (skip_extraction) {
|
|
||||||
# All dates already extracted - optionally rebuild for consistency
|
|
||||||
log_message("✓ All dates already extracted - combined_CI_data.rds is up-to-date")
|
|
||||||
} else {
|
|
||||||
# New dates were extracted - rebuild combined_CI_data.rds from ALL daily files
|
|
||||||
log_message("Rebuilding combined_CI_data.rds from all daily extracted files...")
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
if (!dir.exists(daily_CI_vals_dir)) {
|
|
||||||
log_message("Daily CI directory does not exist yet", level = "WARNING")
|
|
||||||
} else {
|
|
||||||
# List ALL daily CI files (not just new ones)
|
|
||||||
all_daily_files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(all_daily_files) == 0) {
|
|
||||||
log_message("No daily CI files found to combine", level = "WARNING")
|
|
||||||
} else {
|
|
||||||
log_message(paste("Combining all", length(all_daily_files), "daily CI files into combined_CI_data.rds"))
|
|
||||||
|
|
||||||
# Load and combine ALL daily files (creates complete dataset)
|
|
||||||
combined_ci_path <- file.path(paths$cumulative_ci_vals_dir, "combined_CI_data.rds")
|
|
||||||
|
|
||||||
combined_data <- all_daily_files %>%
|
|
||||||
purrr::map(readRDS) %>%
|
|
||||||
purrr::list_rbind() %>%
|
|
||||||
dplyr::group_by(sub_field)
|
|
||||||
|
|
||||||
# Save the rebuilt combined data
|
|
||||||
saveRDS(combined_data, combined_ci_path)
|
|
||||||
|
|
||||||
log_message(paste("✓ Rebuilt combined_CI_data.rds with", nrow(combined_data), "total rows"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}, error = function(e) {
|
|
||||||
log_message(paste("⚠ Error rebuilding combined_CI_data.rds (will skip):", e$message), level = "WARNING")
|
|
||||||
log_message(" Note: This is OK - Script 30 will use growth model RDS instead", level = "WARNING")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sys.nframe() == 0) {
|
|
||||||
main()
|
|
||||||
}
|
|
||||||
|
|
@ -80,9 +80,18 @@ main <- function() {
|
||||||
})
|
})
|
||||||
|
|
||||||
# Get list of dates to process
|
# Get list of dates to process
|
||||||
dates <- date_list(end_date, offset)
|
# If in migration mode, dates_to_process is provided by the pipeline runner
|
||||||
safe_log(sprintf("Processing dates: %s to %s (%d dates)",
|
if (exists("dates_to_process") && !is.null(dates_to_process)) {
|
||||||
dates$start_date, dates$end_date, length(dates$days_filter)))
|
# Migration mode: Use provided list of dates (process ALL available dates)
|
||||||
|
dates_filter <- sort(dates_to_process)
|
||||||
|
safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter)))
|
||||||
|
} else {
|
||||||
|
# Normal mode: Use 7-day offset window
|
||||||
|
dates <- date_list(end_date, offset)
|
||||||
|
dates_filter <- dates$days_filter
|
||||||
|
safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)",
|
||||||
|
dates$start_date, dates$end_date, length(dates_filter)))
|
||||||
|
}
|
||||||
|
|
||||||
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
|
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
|
||||||
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
|
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
|
||||||
|
|
@ -123,7 +132,7 @@ main <- function() {
|
||||||
total_error <- 0
|
total_error <- 0
|
||||||
ci_results_by_date <- list()
|
ci_results_by_date <- list()
|
||||||
|
|
||||||
for (date_str in dates$days_filter) {
|
for (date_str in dates_filter) {
|
||||||
# Load the merged TIFF ONCE for this date
|
# Load the merged TIFF ONCE for this date
|
||||||
merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str))
|
merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,9 @@
|
||||||
# models and Python ML workflows without requiring interpolated/modeled values.
|
# models and Python ML workflows without requiring interpolated/modeled values.
|
||||||
#
|
#
|
||||||
# INPUT DATA:
|
# INPUT DATA:
|
||||||
# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
|
# - Source: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
# - Format: RDS (wide format: fields × dates with CI values)
|
# - Format: RDS (interpolated growth model data from Script 30)
|
||||||
# - Requirement: Script 20 must have completed CI extraction
|
# - Requirement: Script 30 must have completed growth model interpolation
|
||||||
#
|
#
|
||||||
# OUTPUT DATA:
|
# OUTPUT DATA:
|
||||||
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
||||||
|
|
@ -36,12 +36,12 @@
|
||||||
# - Data directories: extracted_ci/cumulative_vals/
|
# - Data directories: extracted_ci/cumulative_vals/
|
||||||
#
|
#
|
||||||
# NOTES:
|
# NOTES:
|
||||||
# - Transformation: Wide format (fields as rows, dates as columns) → Long format
|
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
|
||||||
# - Time series: Preserves all CI values without interpolation
|
# - Handles both wide format and long format inputs from growth model
|
||||||
# - DOY (Day of Year): Calculated from date for seasonal analysis
|
# - DOY (Day of Year): Calculated from date for seasonal analysis
|
||||||
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
||||||
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
||||||
# - Optional: Run only when exporting to Python for ML model training
|
# - Exports complete growth curves with interpolated values for ML training
|
||||||
#
|
#
|
||||||
# RELATED ISSUES:
|
# RELATED ISSUES:
|
||||||
# SC-112: Utilities restructuring
|
# SC-112: Utilities restructuring
|
||||||
|
|
@ -199,39 +199,56 @@ main <- function() {
|
||||||
ci_data_source_dir <- paths$cumulative_ci_vals_dir
|
ci_data_source_dir <- paths$cumulative_ci_vals_dir
|
||||||
ci_data_output_dir <- paths$ci_for_python_dir
|
ci_data_output_dir <- paths$ci_for_python_dir
|
||||||
|
|
||||||
input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds")
|
# Try to load interpolated growth model data from Script 30
|
||||||
|
input_file <- file.path(ci_data_source_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
||||||
output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
|
output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
|
||||||
|
|
||||||
# Check if input file exists
|
# Check if input file exists
|
||||||
if (!file.exists(input_file)) {
|
if (!file.exists(input_file)) {
|
||||||
stop(paste("Input file not found:", input_file))
|
stop(paste("Input file not found:", input_file,
|
||||||
|
"\nScript 30 (growth model) must be run before Script 21."))
|
||||||
}
|
}
|
||||||
|
|
||||||
cat(sprintf("Loading: %s\n", input_file))
|
cat(sprintf("Loading: %s\n", input_file))
|
||||||
|
|
||||||
# Load RDS file
|
# Load RDS file (from Script 30 - already in long format with interpolated values)
|
||||||
ci_data_wide <- readRDS(input_file) %>%
|
ci_data <- readRDS(input_file) %>%
|
||||||
as_tibble()
|
as_tibble()
|
||||||
|
|
||||||
cat(sprintf(" Loaded %d rows\n", nrow(ci_data_wide)))
|
cat(sprintf(" Loaded %d rows\n", nrow(ci_data)))
|
||||||
cat(sprintf(" Format: WIDE (field, sub_field, then dates as columns)\n"))
|
cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", ")))
|
||||||
cat(sprintf(" Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", ")))
|
|
||||||
|
|
||||||
# Step 1: Convert from WIDE to LONG format
|
# Check format and prepare for export
|
||||||
cat("\nStep 1: Converting from wide to long format...\n")
|
# If it's already in long format (from Script 30), use as-is
|
||||||
ci_data_long <- wide_to_long_ci_data(ci_data_wide)
|
# Otherwise, convert from wide to long
|
||||||
|
if ("Date" %in% names(ci_data) || "date" %in% names(ci_data)) {
|
||||||
|
cat(" Detected: LONG format (from growth model)\n")
|
||||||
|
ci_data_long <- ci_data
|
||||||
|
} else {
|
||||||
|
cat(" Detected: WIDE format - converting to long...\n")
|
||||||
|
ci_data_long <- wide_to_long_ci_data(ci_data)
|
||||||
|
}
|
||||||
|
|
||||||
# Step 2: Create complete daily sequences with interpolation
|
# Step 1: Ensure Date column exists and is properly formatted
|
||||||
cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
|
ci_data_long <- ci_data_long %>%
|
||||||
ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
|
mutate(
|
||||||
|
Date = as.Date(Date)
|
||||||
|
)
|
||||||
|
|
||||||
# Step 3: Validate output
|
# Step 2: If interpolated values already present, use them; otherwise create interpolated sequences
|
||||||
cat("\nStep 3: Validating output...")
|
if ("value" %in% names(ci_data_long)) {
|
||||||
validate_conversion_output(ci_data_python)
|
# Already has interpolated values from Script 30
|
||||||
|
cat("\nStep 2: Using interpolated values from growth model...\n")
|
||||||
|
ci_data_python <- ci_data_long
|
||||||
|
} else {
|
||||||
|
# Create interpolated daily sequences
|
||||||
|
cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
|
||||||
|
ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
|
||||||
|
}
|
||||||
|
|
||||||
# Step 4: Save to CSV
|
# Step 4: Save to CSV
|
||||||
cat(sprintf("\nStep 4: Saving to CSV...\n"))
|
cat(sprintf("\nStep 4: Saving to CSV...\\n"))
|
||||||
cat(sprintf(" Output: %s\n", output_file))
|
cat(sprintf(" Output: %s\\n", output_file))
|
||||||
write_csv(ci_data_python, output_file)
|
write_csv(ci_data_python, output_file)
|
||||||
|
|
||||||
cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
|
cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
|
||||||
|
|
|
||||||
|
|
@ -115,15 +115,16 @@ load_combined_ci_data <- function(daily_vals_dir) {
|
||||||
#' @param harvesting_data Dataframe with harvesting information
|
#' @param harvesting_data Dataframe with harvesting information
|
||||||
#' @param field_CI_data Dataframe with CI measurements
|
#' @param field_CI_data Dataframe with CI measurements
|
||||||
#' @param season Year of the growing season
|
#' @param season Year of the growing season
|
||||||
|
#' @param verbose Logical: whether to log warnings/info (default TRUE). Set to FALSE during progress bar iteration.
|
||||||
#' @return Dataframe with interpolated daily CI values
|
#' @return Dataframe with interpolated daily CI values
|
||||||
#'
|
#'
|
||||||
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) {
|
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, verbose = TRUE) {
|
||||||
# Filter harvesting data for the given season and field name
|
# Filter harvesting data for the given season and field name
|
||||||
filtered_harvesting_data <- harvesting_data %>%
|
filtered_harvesting_data <- harvesting_data %>%
|
||||||
dplyr::filter(year == season, sub_field == field_name)
|
dplyr::filter(year == season, sub_field == field_name)
|
||||||
|
|
||||||
if (nrow(filtered_harvesting_data) == 0) {
|
if (nrow(filtered_harvesting_data) == 0) {
|
||||||
safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
|
if (verbose) safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -133,7 +134,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
||||||
|
|
||||||
# Return an empty data frame if no CI data is found
|
# Return an empty data frame if no CI data is found
|
||||||
if (nrow(filtered_field_CI_data) == 0) {
|
if (nrow(filtered_field_CI_data) == 0) {
|
||||||
safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
|
if (verbose) safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -157,12 +158,14 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
||||||
|
|
||||||
# If CI is empty after filtering, return an empty dataframe
|
# If CI is empty after filtering, return an empty dataframe
|
||||||
if (nrow(CI) == 0) {
|
if (nrow(CI) == 0) {
|
||||||
safe_log(paste0("No CI data within season dates for field: ", field_name,
|
if (verbose) {
|
||||||
" (Season: ", season, ", dates: ",
|
safe_log(paste0("No CI data within season dates for field: ", field_name,
|
||||||
format(season_start, "%Y-%m-%d"), " to ",
|
" (Season: ", season, ", dates: ",
|
||||||
format(season_end, "%Y-%m-%d"),
|
format(season_start, "%Y-%m-%d"), " to ",
|
||||||
"). Available CI data range: ", ci_date_range),
|
format(season_end, "%Y-%m-%d"),
|
||||||
"WARNING")
|
"). Available CI data range: ", ci_date_range),
|
||||||
|
"WARNING")
|
||||||
|
}
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -175,20 +178,17 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
||||||
subField = field_name
|
subField = field_name
|
||||||
)
|
)
|
||||||
|
|
||||||
# Log successful interpolation
|
# Return data with success status
|
||||||
safe_log(paste0("Successfully interpolated CI data for field: ", field_name,
|
|
||||||
" (Season: ", season, ", dates: ",
|
|
||||||
format(season_start, "%Y-%m-%d"), " to ",
|
|
||||||
format(season_end, "%Y-%m-%d"),
|
|
||||||
"). ", nrow(CI), " data points created."))
|
|
||||||
|
|
||||||
return(CI)
|
return(CI)
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
safe_log(paste0("Error interpolating CI data for field ", field_name,
|
# Return empty dataframe on error (will be tracked separately)
|
||||||
" in season ", season,
|
if (verbose) {
|
||||||
" (", format(season_start, "%Y-%m-%d"), " to ",
|
safe_log(paste0("Error interpolating CI data for field ", field_name,
|
||||||
format(season_end, "%Y-%m-%d"),
|
" in season ", season,
|
||||||
"): ", e$message), "ERROR")
|
" (", format(season_start, "%Y-%m-%d"), " to ",
|
||||||
|
format(season_end, "%Y-%m-%d"),
|
||||||
|
"): ", e$message), "ERROR")
|
||||||
|
}
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -203,17 +203,19 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
|
||||||
generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
||||||
safe_log("Starting CI data interpolation for all fields")
|
safe_log("Starting CI data interpolation for all fields")
|
||||||
|
|
||||||
|
# Track failed fields for end-of-run summary
|
||||||
|
failed_fields <- list()
|
||||||
|
total_fields <- 0
|
||||||
|
successful_fields <- 0
|
||||||
|
|
||||||
# Process each year
|
# Process each year
|
||||||
result <- purrr::map_df(years, function(yr) {
|
result <- purrr::map_df(years, function(yr) {
|
||||||
safe_log(paste("Processing year:", yr))
|
|
||||||
|
|
||||||
# Get the fields harvested in this year with valid season start dates
|
# Get the fields harvested in this year with valid season start dates
|
||||||
sub_fields <- harvesting_data %>%
|
sub_fields <- harvesting_data %>%
|
||||||
dplyr::filter(year == yr, !is.na(season_start)) %>%
|
dplyr::filter(year == yr, !is.na(season_start)) %>%
|
||||||
dplyr::pull(sub_field)
|
dplyr::pull(sub_field)
|
||||||
|
|
||||||
if (length(sub_fields) == 0) {
|
if (length(sub_fields) == 0) {
|
||||||
safe_log(paste("No fields with valid season data for year:", yr), "WARNING")
|
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -222,24 +224,64 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
||||||
purrr::keep(~ any(ci_data$sub_field == .x))
|
purrr::keep(~ any(ci_data$sub_field == .x))
|
||||||
|
|
||||||
if (length(valid_sub_fields) == 0) {
|
if (length(valid_sub_fields) == 0) {
|
||||||
safe_log(paste("No fields with CI data for year:", yr), "WARNING")
|
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract and interpolate data for each valid field
|
# Initialize progress bar for this year
|
||||||
safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr))
|
total_fields <<- total_fields + length(valid_sub_fields)
|
||||||
|
pb <- txtProgressBar(min = 0, max = length(valid_sub_fields), style = 3, width = 50)
|
||||||
|
counter <- 0
|
||||||
|
|
||||||
result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x,
|
# Extract and interpolate data for each valid field with progress bar
|
||||||
harvesting_data = harvesting_data,
|
result_list <- list()
|
||||||
field_CI_data = ci_data,
|
for (field in valid_sub_fields) {
|
||||||
season = yr)) %>%
|
counter <- counter + 1
|
||||||
purrr::list_rbind()
|
setTxtProgressBar(pb, counter)
|
||||||
|
|
||||||
safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr))
|
# Call with verbose=FALSE to suppress warnings during progress bar iteration
|
||||||
return(result)
|
field_result <- extract_CI_data(field,
|
||||||
|
harvesting_data = harvesting_data,
|
||||||
|
field_CI_data = ci_data,
|
||||||
|
season = yr,
|
||||||
|
verbose = FALSE)
|
||||||
|
|
||||||
|
if (nrow(field_result) > 0) {
|
||||||
|
successful_fields <<- successful_fields + 1
|
||||||
|
result_list[[field]] <- field_result
|
||||||
|
} else {
|
||||||
|
# Track failed field
|
||||||
|
failed_fields[[length(failed_fields) + 1]] <<- list(
|
||||||
|
field = field,
|
||||||
|
season = yr,
|
||||||
|
reason = "Unable to generate interpolated data"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(pb)
|
||||||
|
cat("\n") # Newline after progress bar
|
||||||
|
|
||||||
|
# Combine all results for this year
|
||||||
|
if (length(result_list) > 0) {
|
||||||
|
purrr::list_rbind(result_list)
|
||||||
|
} else {
|
||||||
|
data.frame()
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
safe_log(paste("Total interpolated data points:", nrow(result)))
|
# Print summary
|
||||||
|
safe_log(sprintf("\n=== Interpolation Summary ==="))
|
||||||
|
safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields))
|
||||||
|
|
||||||
|
if (length(failed_fields) > 0) {
|
||||||
|
safe_log(sprintf("Failed to interpolate: %d fields", length(failed_fields)))
|
||||||
|
for (failure in failed_fields) {
|
||||||
|
safe_log(sprintf(" - Field %s (Season %d): %s",
|
||||||
|
failure$field, failure$season, failure$reason), "WARNING")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(sprintf("Total interpolated data points: %d", nrow(result)))
|
||||||
|
|
||||||
return(result)
|
return(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,296 +0,0 @@
|
||||||
# ============================================================================
|
|
||||||
# SCRIPT 40: Weekly Mosaic Creation (CI Band Aggregation)
|
|
||||||
# ============================================================================
|
|
||||||
# PURPOSE:
|
|
||||||
# Create weekly 5-band (R, G, B, NIR, CI) mosaics from daily satellite
|
|
||||||
# imagery. Aggregates multi-day CI data into single weekly composite raster
|
|
||||||
# for field-level analysis. Supports per-field or single-file architectures.
|
|
||||||
#
|
|
||||||
# INPUT DATA:
|
|
||||||
# - Daily per-field TIFFs: laravel_app/storage/app/{project}/daily_tiles/{YYYY-MM-DD}/*.tif
|
|
||||||
# (or single-file mosaics: merged_tif/{YYYY-MM-DD}.tif + pivot.geojson masking)
|
|
||||||
# - CI data (RDS): laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
|
|
||||||
# - Field boundaries: laravel_app/storage/app/{project}/pivot.geojson
|
|
||||||
#
|
|
||||||
# OUTPUT DATA:
|
|
||||||
# - Destination: laravel_app/storage/app/{project}/weekly_mosaic/
|
|
||||||
# - Format: 5-band GeoTIFF (uint16)
|
|
||||||
# - Naming: week_{WW}.tif (week number + year, e.g., week_35_2025.tif)
|
|
||||||
# - Spatial: Raster aligned to field boundaries; CRS preserved
|
|
||||||
#
|
|
||||||
# USAGE:
|
|
||||||
# Rscript 40_mosaic_creation.R [end_date] [offset] [project] [file_name] [data_source]
|
|
||||||
#
|
|
||||||
# Example (Windows PowerShell):
|
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 aura
|
|
||||||
#
|
|
||||||
# PARAMETERS:
|
|
||||||
# - end_date: End date (YYYY-MM-DD format); required for weekly aggregation
|
|
||||||
# - offset: Days to look back (typically 7 for one week)
|
|
||||||
# - project: Project name (aura, angata, chemba, xinavane, esa, simba)
|
|
||||||
# - file_name: Custom output filename (optional; default: week_{WW}_{YYYY}.tif)
|
|
||||||
# - data_source: Data folder (optional; auto-detects merged_tif or merged_tif_8b)
|
|
||||||
#
|
|
||||||
# CLIENT TYPES:
|
|
||||||
# - cane_supply (ANGATA): Yes - harvest readiness timeline depends on weekly mosaic
|
|
||||||
# - agronomic_support (AURA): Yes - KPI calculation requires weekly CI bands
|
|
||||||
#
|
|
||||||
# DEPENDENCIES:
|
|
||||||
# - Packages: sf, terra, tidyverse, lubridate, here
|
|
||||||
# - Utils files: parameters_project.R, 00_common_utils.R, 40_mosaic_creation_utils.R
|
|
||||||
# - Input data: Daily per-field TIFFs (Script 10) + CI extraction (Script 20)
|
|
||||||
# - Data: field boundaries (pivot.geojson), harvest dates (if available)
|
|
||||||
#
|
|
||||||
# NOTES:
|
|
||||||
# - Weekly aggregation: Combines 7 days of daily data into single composite
|
|
||||||
# - 5-band output: R, G, B, NIR, and Canopy Index (CI) derived from NDVI
|
|
||||||
# - Tiling support: Handles per-field TIFF architecture; auto-mosaics if needed
|
|
||||||
# - Data source auto-detection: Searches merged_tif/ or merged_tif_8b/ folders
|
|
||||||
# - Command-line driven: Designed for batch scheduling (cron/Task Scheduler)
|
|
||||||
# - Downstream: Script 80 (KPI calculation) depends on weekly_mosaic/ output
|
|
||||||
# - Performance: Multi-file mosaicing (~25 fields) takes 5-10 minutes per week
|
|
||||||
#
|
|
||||||
# RELATED ISSUES:
|
|
||||||
# SC-113: Script header standardization
|
|
||||||
# SC-112: Utilities restructuring
|
|
||||||
# SC-111: Script 10 geometry validation
|
|
||||||
#
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# 1. Load required packages
|
|
||||||
# -----------------------
|
|
||||||
suppressPackageStartupMessages({
|
|
||||||
# File path handling
|
|
||||||
library(here) # For relative path resolution (platform-independent file paths)
|
|
||||||
|
|
||||||
# Spatial data handling
|
|
||||||
library(sf) # For spatial operations (field boundary masking)
|
|
||||||
library(terra) # For raster operations (reading/writing/stacking GeoTIFFs)
|
|
||||||
|
|
||||||
# Data manipulation
|
|
||||||
library(tidyverse) # For dplyr, readr (data wrangling)
|
|
||||||
library(lubridate) # For date/time operations (week extraction, date formatting)
|
|
||||||
})
|
|
||||||
|
|
||||||
# 2. Process command line arguments and run mosaic creation
|
|
||||||
# ------------------------------------------------------
|
|
||||||
main <- function() {
|
|
||||||
# Capture command line arguments
|
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
|
||||||
|
|
||||||
# Process project_dir argument with default
|
|
||||||
if (length(args) >= 3 && !is.na(args[3])) {
|
|
||||||
project_dir <- as.character(args[3])
|
|
||||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
|
||||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
|
||||||
} else {
|
|
||||||
# Default project directory
|
|
||||||
project_dir <- "angata"
|
|
||||||
message("No project_dir provided. Using default:", project_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Make project_dir available globally so parameters_project.R can use it
|
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
# Process end_date argument with default
|
|
||||||
if (length(args) >= 1 && !is.na(args[1])) {
|
|
||||||
# Parse date explicitly in YYYY-MM-DD format from command line
|
|
||||||
end_date <- as.Date(args[1], format = "%Y-%m-%d")
|
|
||||||
if (is.na(end_date)) {
|
|
||||||
message("Invalid end_date provided. Using current date.")
|
|
||||||
end_date <- Sys.Date()
|
|
||||||
}
|
|
||||||
} else if (exists("end_date_str", envir = .GlobalEnv)) {
|
|
||||||
end_date <- as.Date(get("end_date_str", envir = .GlobalEnv))
|
|
||||||
} else {
|
|
||||||
# Default to current date if no argument is provided
|
|
||||||
end_date <- Sys.Date()
|
|
||||||
message("No end_date provided. Using current date: ", format(end_date))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process offset argument with default
|
|
||||||
if (length(args) >= 2 && !is.na(args[2])) {
|
|
||||||
offset <- as.numeric(args[2])
|
|
||||||
if (is.na(offset) || offset <= 0) {
|
|
||||||
message("Invalid offset provided. Using default (7 days).")
|
|
||||||
offset <- 7
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
# Default to 7 days if no argument is provided
|
|
||||||
offset <- 7
|
|
||||||
message("No offset provided. Using default:", offset, "days")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process data_source argument (optional, passed from pipeline)
|
|
||||||
# If provided, use it; otherwise auto-detect
|
|
||||||
data_source_from_args <- NULL
|
|
||||||
if (length(args) >= 5 && !is.na(args[5]) && nchar(args[5]) > 0) {
|
|
||||||
data_source_from_args <- as.character(args[5])
|
|
||||||
message("Data source explicitly provided via arguments: ", data_source_from_args)
|
|
||||||
}
|
|
||||||
|
|
||||||
# 3. Initialize project configuration
|
|
||||||
# --------------------------------
|
|
||||||
|
|
||||||
# Detect which data source directory exists (merged_tif or merged_tif_8b)
|
|
||||||
# IMPORTANT: Only consider a folder as valid if it contains actual files
|
|
||||||
laravel_storage <- here::here("laravel_app/storage/app", project_dir)
|
|
||||||
|
|
||||||
# Load centralized path structure
|
|
||||||
tryCatch({
|
|
||||||
source("r_app/parameters_project.R")
|
|
||||||
paths <- setup_project_directories(project_dir)
|
|
||||||
}, error = function(e) {
|
|
||||||
message("Note: Could not open files from r_app directory")
|
|
||||||
message("Attempting to source from default directory instead...")
|
|
||||||
tryCatch({
|
|
||||||
source("parameters_project.R")
|
|
||||||
paths <- setup_project_directories(project_dir)
|
|
||||||
message("✓ Successfully sourced files from default directory")
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Failed to source required files from both 'r_app' and default directories.")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
data_source <- if (has_8b_data) {
|
|
||||||
message("Auto-detected data source: merged_tif_8b (8-band optimized) - contains files")
|
|
||||||
"merged_tif_8b"
|
|
||||||
} else if (has_legacy_data) {
|
|
||||||
message("Auto-detected data source: merged_tif (legacy 4-band) - contains files")
|
|
||||||
"merged_tif"
|
|
||||||
} else {
|
|
||||||
message("Warning: No valid data source found (both folders empty or missing). Using default: merged_tif")
|
|
||||||
"merged_tif"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Set global data_source for parameters_project.R
|
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
source("r_app/parameters_project.R")
|
|
||||||
source("r_app/00_common_utils.R")
|
|
||||||
source("r_app/40_mosaic_creation_utils.R")
|
|
||||||
safe_log(paste("Successfully sourced files from 'r_app' directory."))
|
|
||||||
}, error = function(e) {
|
|
||||||
message("Note: Could not open files from r_app directory")
|
|
||||||
message("Attempting to source from default directory instead...")
|
|
||||||
tryCatch({
|
|
||||||
source("parameters_project.R")
|
|
||||||
paths <- setup_project_directories(project_dir)
|
|
||||||
message("✓ Successfully sourced files from default directory")
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Failed to source required files from both 'r_app' and default directories.")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
# Use centralized paths (no need to manually construct or create dirs)
|
|
||||||
merged_final <- paths$growth_model_interpolated_dir # or merged_final_tif if needed
|
|
||||||
daily_vrt <- paths$vrt_dir
|
|
||||||
|
|
||||||
safe_log(paste("Using growth model/mosaic directory:", merged_final))
|
|
||||||
safe_log(paste("Using daily VRT directory:", daily_vrt))
|
|
||||||
|
|
||||||
# 4. Generate date range for processing
|
|
||||||
# ---------------------------------
|
|
||||||
dates <- date_list(end_date, offset)
|
|
||||||
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
|
|
||||||
|
|
||||||
# Create output filename
|
|
||||||
# Only use custom filename if explicitly provided (not empty string)
|
|
||||||
file_name_tif <- if (length(args) >= 4 && !is.na(args[4]) && nchar(args[4]) > 0) {
|
|
||||||
as.character(args[4])
|
|
||||||
} else {
|
|
||||||
paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")
|
|
||||||
}
|
|
||||||
|
|
||||||
safe_log(paste("Output will be saved as:", file_name_tif))
|
|
||||||
|
|
||||||
# 5. Create weekly mosaics - route based on project tile detection
|
|
||||||
# ---------------------------------------------------------------
|
|
||||||
# The use_tile_mosaic flag is auto-detected by parameters_project.R
|
|
||||||
# based on whether tiles exist in merged_final_tif/
|
|
||||||
|
|
||||||
if (!exists("use_tile_mosaic")) {
|
|
||||||
# Fallback detection if flag not set (shouldn't happen)
|
|
||||||
merged_final_dir <- file.path(laravel_storage, "merged_final_tif")
|
|
||||||
tile_detection <- detect_tile_structure_from_merged_final(merged_final_dir)
|
|
||||||
use_tile_mosaic <- tile_detection$has_tiles
|
|
||||||
}
|
|
||||||
|
|
||||||
if (use_tile_mosaic) {
|
|
||||||
# TILE-BASED APPROACH: Create per-tile weekly MAX mosaics
|
|
||||||
# This is used for projects like Angata with large ROIs requiring spatial partitioning
|
|
||||||
# Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20)
|
|
||||||
tryCatch({
|
|
||||||
safe_log("Starting per-tile mosaic creation (tile-based approach)...")
|
|
||||||
|
|
||||||
# Detect grid size from merged_final_tif folder structure
|
|
||||||
# Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc.
|
|
||||||
merged_final_base <- file.path(laravel_storage, "merged_final_tif")
|
|
||||||
grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE)
|
|
||||||
# Look for grid size patterns like "5x5", "10x10", "20x20"
|
|
||||||
grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
|
|
||||||
|
|
||||||
if (length(grid_patterns) == 0) {
|
|
||||||
stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)")
|
|
||||||
}
|
|
||||||
|
|
||||||
grid_size <- grid_patterns[1] # Use first grid size found
|
|
||||||
safe_log(paste("Detected grid size:", grid_size))
|
|
||||||
|
|
||||||
# Point to the grid-specific merged_final_tif directory
|
|
||||||
merged_final_with_grid <- file.path(merged_final_base, grid_size)
|
|
||||||
|
|
||||||
# Set output directory for per-tile mosaics, organized by grid size (from centralized paths)
|
|
||||||
# Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif
|
|
||||||
tile_output_base <- file.path(paths$weekly_tile_max_dir, grid_size)
|
|
||||||
# Note: no dir.create needed - paths$weekly_tile_max_dir already created by setup_project_directories()
|
|
||||||
dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE) # Create grid-size subfolder
|
|
||||||
|
|
||||||
created_tile_files <- create_weekly_mosaic_from_tiles(
|
|
||||||
dates = dates,
|
|
||||||
merged_final_dir = merged_final_with_grid,
|
|
||||||
tile_output_dir = tile_output_base,
|
|
||||||
field_boundaries = field_boundaries
|
|
||||||
)
|
|
||||||
|
|
||||||
safe_log(paste("✓ Per-tile mosaic creation completed - created",
|
|
||||||
length(created_tile_files), "tile files"))
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR")
|
|
||||||
traceback()
|
|
||||||
stop("Mosaic creation failed")
|
|
||||||
})
|
|
||||||
|
|
||||||
} else {
|
|
||||||
# SINGLE-FILE APPROACH: Create single weekly mosaic file
|
|
||||||
# This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output
|
|
||||||
tryCatch({
|
|
||||||
safe_log("Starting single-file mosaic creation (backward-compatible approach)...")
|
|
||||||
|
|
||||||
# Set output directory for single-file mosaics (from centralized paths)
|
|
||||||
single_file_output_dir <- paths$weekly_mosaic_dir
|
|
||||||
|
|
||||||
created_file <- create_weekly_mosaic(
|
|
||||||
dates = dates,
|
|
||||||
field_boundaries = field_boundaries,
|
|
||||||
daily_vrt_dir = daily_vrt,
|
|
||||||
merged_final_dir = merged_final,
|
|
||||||
output_dir = single_file_output_dir,
|
|
||||||
file_name_tif = file_name_tif,
|
|
||||||
create_plots = FALSE
|
|
||||||
)
|
|
||||||
|
|
||||||
safe_log(paste("✓ Single-file mosaic creation completed:", created_file))
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR")
|
|
||||||
traceback()
|
|
||||||
stop("Mosaic creation failed")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sys.nframe() == 0) {
|
|
||||||
main()
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -165,6 +165,13 @@ main <- function() {
|
||||||
|
|
||||||
dates <- date_list(end_date, offset)
|
dates <- date_list(end_date, offset)
|
||||||
|
|
||||||
|
# Validate week calculation
|
||||||
|
message(sprintf("[INFO] Requested offset: %d days", offset))
|
||||||
|
message(sprintf("[INFO] End date: %s", format(end_date, "%Y-%m-%d")))
|
||||||
|
message(sprintf("[INFO] Start date: %s", format(dates$start_date, "%Y-%m-%d")))
|
||||||
|
message(sprintf("[INFO] Calculating ISO week: %d", dates$week))
|
||||||
|
message(sprintf("[INFO] Calculating ISO year: %d", dates$year))
|
||||||
|
|
||||||
# ==== Create Per-Field Weekly Mosaics ====
|
# ==== Create Per-Field Weekly Mosaics ====
|
||||||
|
|
||||||
created_files <- create_all_field_weekly_mosaics(
|
created_files <- create_all_field_weekly_mosaics(
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,15 @@ date_list <- function(end_date, offset) {
|
||||||
week <- lubridate::isoweek(end_date)
|
week <- lubridate::isoweek(end_date)
|
||||||
year <- lubridate::isoyear(end_date)
|
year <- lubridate::isoyear(end_date)
|
||||||
|
|
||||||
|
# Validate: Check that all dates in range belong to same ISO week
|
||||||
|
start_week <- lubridate::isoweek(start_date)
|
||||||
|
start_year <- lubridate::isoyear(start_date)
|
||||||
|
|
||||||
|
if (start_week != week || start_year != year) {
|
||||||
|
safe_log(sprintf("WARNING: Date range spans multiple ISO weeks! Start: week %d/%d, End: week %d/%d. Using END date week %d/%d.",
|
||||||
|
start_week, start_year, week, year, week, year), "WARNING")
|
||||||
|
}
|
||||||
|
|
||||||
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
||||||
days_filter <- format(days_filter, "%Y-%m-%d")
|
days_filter <- format(days_filter, "%Y-%m-%d")
|
||||||
|
|
||||||
|
|
@ -117,7 +126,6 @@ find_per_field_tiffs_for_week <- function(field_tiles_ci_dir, days_filter) {
|
||||||
create_field_weekly_composite <- function(tiff_files, field_name) {
|
create_field_weekly_composite <- function(tiff_files, field_name) {
|
||||||
|
|
||||||
if (length(tiff_files) == 0) {
|
if (length(tiff_files) == 0) {
|
||||||
safe_log(paste("No TIFF files for field:", field_name), "WARNING")
|
|
||||||
return(NULL)
|
return(NULL)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -129,35 +137,30 @@ create_field_weekly_composite <- function(tiff_files, field_name) {
|
||||||
r <- terra::rast(file)
|
r <- terra::rast(file)
|
||||||
rasters[[length(rasters) + 1]] <- r
|
rasters[[length(rasters) + 1]] <- r
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
safe_log(paste("Warning: Could not load", basename(file), "for field", field_name), "WARNING")
|
# Silently skip load errors (they're already counted)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
if (length(rasters) == 0) {
|
if (length(rasters) == 0) {
|
||||||
safe_log(paste("Failed to load any rasters for field:", field_name), "ERROR")
|
|
||||||
return(NULL)
|
return(NULL)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create MAX composite
|
# Create MAX composite
|
||||||
if (length(rasters) == 1) {
|
if (length(rasters) == 1) {
|
||||||
composite <- rasters[[1]]
|
composite <- rasters[[1]]
|
||||||
safe_log(paste(" Field", field_name, "- single day (no compositing needed)"))
|
|
||||||
} else {
|
} else {
|
||||||
# Stack all rasters and apply MAX per pixel per band
|
# Stack all rasters and apply MAX per pixel per band
|
||||||
collection <- terra::sprc(rasters)
|
collection <- terra::sprc(rasters)
|
||||||
composite <- terra::mosaic(collection, fun = "max")
|
composite <- terra::mosaic(collection, fun = "max")
|
||||||
safe_log(paste(" Field", field_name, "- MAX composite from", length(rasters), "days"))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Ensure 5 bands with expected names
|
# Ensure 5 bands with expected names
|
||||||
if (terra::nlyr(composite) >= 5) {
|
if (terra::nlyr(composite) >= 5) {
|
||||||
composite <- terra::subset(composite, 1:5)
|
composite <- terra::subset(composite, 1:5)
|
||||||
names(composite) <- c("Red", "Green", "Blue", "NIR", "CI")
|
names(composite) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||||
} else {
|
|
||||||
safe_log(paste("Warning: Field", field_name, "has", terra::nlyr(composite),
|
|
||||||
"bands (expected 5)"), "WARNING")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return(composite)
|
return(composite)
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
|
|
@ -190,11 +193,9 @@ save_field_weekly_mosaic <- function(raster, output_dir, field_name, week, year)
|
||||||
filename <- sprintf("week_%02d_%04d.tif", week, year)
|
filename <- sprintf("week_%02d_%04d.tif", week, year)
|
||||||
file_path <- file.path(field_output_dir, filename)
|
file_path <- file.path(field_output_dir, filename)
|
||||||
|
|
||||||
# Save raster
|
# Save raster (silently)
|
||||||
terra::writeRaster(raster, file_path, overwrite = TRUE)
|
terra::writeRaster(raster, file_path, overwrite = TRUE)
|
||||||
|
|
||||||
safe_log(paste(" Saved:", basename(field_output_dir), "/", filename))
|
|
||||||
|
|
||||||
return(file_path)
|
return(file_path)
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
|
|
@ -229,8 +230,13 @@ create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_di
|
||||||
|
|
||||||
created_files <- character()
|
created_files <- character()
|
||||||
|
|
||||||
|
# Initialize progress bar
|
||||||
|
pb <- txtProgressBar(min = 0, max = length(field_tiffs), style = 3, width = 50)
|
||||||
|
counter <- 0
|
||||||
|
|
||||||
# Process each field
|
# Process each field
|
||||||
for (field_name in names(field_tiffs)) {
|
for (field_name in names(field_tiffs)) {
|
||||||
|
counter <- counter + 1
|
||||||
tiff_files <- field_tiffs[[field_name]]
|
tiff_files <- field_tiffs[[field_name]]
|
||||||
|
|
||||||
# Create composite
|
# Create composite
|
||||||
|
|
@ -250,8 +256,12 @@ create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_di
|
||||||
created_files <- c(created_files, saved_path)
|
created_files <- c(created_files, saved_path)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setTxtProgressBar(pb, counter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
close(pb)
|
||||||
|
cat("\n") # New line after progress bar
|
||||||
safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics"))
|
safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics"))
|
||||||
|
|
||||||
return(created_files)
|
return(created_files)
|
||||||
|
|
|
||||||
|
|
@ -48,10 +48,6 @@
|
||||||
# - Critical dependency for Scripts 90/91 (reporting/dashboards)
|
# - Critical dependency for Scripts 90/91 (reporting/dashboards)
|
||||||
# - Uses Moran's I for spatial clustering detection (weed/stress patterns)
|
# - Uses Moran's I for spatial clustering detection (weed/stress patterns)
|
||||||
#
|
#
|
||||||
# RELATED ISSUES:
|
|
||||||
# SC-112: Script 80 utilities restructuring (common + client-aware modules)
|
|
||||||
# SC-108: Core pipeline improvements
|
|
||||||
# SC-100: KPI definition and formula documentation
|
|
||||||
#
|
#
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# [✓] Extract planting dates per field
|
# [✓] Extract planting dates per field
|
||||||
|
|
@ -320,7 +316,6 @@ main <- function() {
|
||||||
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
|
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
|
||||||
|
|
||||||
# Use centralized paths from setup object (no need for file.path calls)
|
# Use centralized paths from setup object (no need for file.path calls)
|
||||||
weekly_tile_max <- setup$weekly_tile_max_dir
|
|
||||||
weekly_mosaic <- setup$weekly_mosaic_dir
|
weekly_mosaic <- setup$weekly_mosaic_dir
|
||||||
daily_vals_dir <- setup$daily_ci_vals_dir
|
daily_vals_dir <- setup$daily_ci_vals_dir
|
||||||
|
|
||||||
|
|
@ -394,96 +389,66 @@ main <- function() {
|
||||||
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
|
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
|
||||||
message(strrep("=", 70))
|
message(strrep("=", 70))
|
||||||
|
|
||||||
|
# Set reports_dir for CANE_SUPPLY workflow (used by export functions)
|
||||||
|
reports_dir <- setup$kpi_reports_dir
|
||||||
|
data_dir <- setup$data_dir
|
||||||
|
|
||||||
# Continue with existing per-field analysis code below
|
# Continue with existing per-field analysis code below
|
||||||
|
|
||||||
message("\n", strrep("-", 70))
|
message("\n", strrep("-", 70))
|
||||||
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
|
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
|
||||||
message(strrep("-", 70))
|
message(strrep("-", 70))
|
||||||
current_week <- as.numeric(format(end_date, "%V")) # ISO week number (1-53)
|
current_week <- as.numeric(format(end_date, "%V")) # ISO week number (1-53)
|
||||||
year <- as.numeric(format(end_date, "%G")) # Use ISO week year (%G) to match Script 40's mosaic naming
|
year <- as.numeric(format(end_date, "%G")) # Use ISO week year (%G) to match Script 40's mosaic naming
|
||||||
|
|
||||||
# Calculate previous week using authoritative helper (handles year boundaries correctly)
|
# Calculate previous week using authoritative helper (handles year boundaries correctly)
|
||||||
source("r_app/80_weekly_stats_utils.R") # Load helper function
|
# Function already loaded from 80_utils_common.R sourced earlier
|
||||||
previous_info <- calculate_target_week_and_year(current_week, year, offset_weeks = 1)
|
previous_info <- calculate_target_week_and_year(current_week, year, offset_weeks = 1)
|
||||||
previous_week <- previous_info$week
|
previous_week <- previous_info$week
|
||||||
previous_year <- previous_info$year
|
previous_year <- previous_info$year
|
||||||
|
|
||||||
message(paste("Week:", current_week, "/ Year (ISO):", year))
|
message(paste("Week:", current_week, "/ Year (ISO):", year))
|
||||||
|
|
||||||
# Find mosaic files - support both tile-based AND single-file approaches
|
# Find per-field weekly mosaics
|
||||||
message("Finding mosaic files...")
|
message("Finding per-field weekly mosaics...")
|
||||||
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, year)
|
|
||||||
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
|
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
|
||||||
|
|
||||||
# PRIORITY 1: Check for tile-based mosaics (projects with large ROI)
|
if (!dir.exists(weekly_mosaic)) {
|
||||||
detected_grid_size <- NA
|
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
|
||||||
mosaic_dir <- NA
|
"\nScript 40 (mosaic creation) must be run first."))
|
||||||
mosaic_mode <- NA
|
}
|
||||||
|
|
||||||
if (dir.exists(weekly_tile_max)) {
|
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
|
||||||
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
|
field_dirs <- field_dirs[field_dirs != ""]
|
||||||
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
|
||||||
if (length(grid_patterns) > 0) {
|
|
||||||
detected_grid_size <- grid_patterns[1]
|
|
||||||
mosaic_dir <- file.path(weekly_tile_max, detected_grid_size)
|
|
||||||
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(tile_files) > 0) {
|
if (length(field_dirs) == 0) {
|
||||||
message(paste(" ✓ Using tile-based approach (grid-size:", detected_grid_size, ")"))
|
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
|
||||||
message(paste(" Found", length(tile_files), "tiles"))
|
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
|
||||||
mosaic_mode <- "tiled"
|
}
|
||||||
}
|
|
||||||
|
# Verify we have mosaics for this week
|
||||||
|
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
|
||||||
|
per_field_files <- c()
|
||||||
|
for (field in field_dirs) {
|
||||||
|
field_mosaic_dir <- file.path(weekly_mosaic, field)
|
||||||
|
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
||||||
|
if (length(files) > 0) {
|
||||||
|
per_field_files <- c(per_field_files, files)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# PRIORITY 2: Check for per-field mosaics (NEW per-field architecture)
|
if (length(per_field_files) == 0) {
|
||||||
if (is.na(mosaic_mode)) {
|
stop(paste("ERROR: No mosaics found for week", current_week, "year", year,
|
||||||
message(" No tiles found. Checking for per-field mosaics...")
|
"\nExpected pattern:", single_file_pattern,
|
||||||
# Check if weekly_mosaic has field subdirectories
|
"\nChecked:", weekly_mosaic))
|
||||||
if (dir.exists(weekly_mosaic)) {
|
|
||||||
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
|
|
||||||
field_dirs <- field_dirs[field_dirs != ""]
|
|
||||||
|
|
||||||
if (length(field_dirs) > 0) {
|
|
||||||
# Check if any field has the week pattern we're looking for
|
|
||||||
per_field_files <- c()
|
|
||||||
for (field in field_dirs) {
|
|
||||||
field_mosaic_dir <- file.path(weekly_mosaic, field)
|
|
||||||
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
|
||||||
if (length(files) > 0) {
|
|
||||||
per_field_files <- c(per_field_files, files)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(per_field_files) > 0) {
|
|
||||||
message(paste(" ✓ Using per-field mosaic approach"))
|
|
||||||
message(paste(" Found", length(per_field_files), "per-field mosaics"))
|
|
||||||
mosaic_mode <- "per-field"
|
|
||||||
mosaic_dir <- weekly_mosaic # Will be field subdirectories
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# PRIORITY 3: Fall back to single-file mosaic (legacy approach)
|
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
|
||||||
if (is.na(mosaic_mode)) {
|
|
||||||
message(" No per-field mosaics found. Checking for single-file mosaic (legacy approach)...")
|
mosaic_mode <- "per-field"
|
||||||
mosaic_dir <- weekly_mosaic
|
mosaic_dir <- weekly_mosaic
|
||||||
single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(single_file) > 0) {
|
|
||||||
message(paste(" ✓ Using single-file approach"))
|
|
||||||
message(paste(" Found 1 mosaic file:", basename(single_file[1])))
|
|
||||||
mosaic_mode <- "single-file"
|
|
||||||
} else {
|
|
||||||
stop(paste("ERROR: No mosaic files found for week", current_week, year,
|
|
||||||
"\n Checked (1) tile-based:", file.path(weekly_tile_max, "*", "week_*.tif"),
|
|
||||||
"\n Checked (2) per-field:", file.path(weekly_mosaic, "*", "week_*.tif"),
|
|
||||||
"\n Checked (3) single-file:", file.path(weekly_mosaic, "week_*.tif")))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
message(paste(" Using mosaic mode:", mosaic_mode))
|
|
||||||
|
|
||||||
# Load field boundaries
|
# Load field boundaries
|
||||||
tryCatch({
|
tryCatch({
|
||||||
|
|
@ -551,44 +516,15 @@ main <- function() {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
|
# Build per-field configuration
|
||||||
# ============================================================================
|
|
||||||
# NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Build tile grid (needed by calculate_field_statistics)
|
|
||||||
message("\nPreparing mosaic configuration for statistics calculation...")
|
message("\nPreparing mosaic configuration for statistics calculation...")
|
||||||
|
message(" ✓ Using per-field mosaic architecture (1 TIFF per field)")
|
||||||
|
|
||||||
# For tile-based mosaics: build the grid mapping
|
# Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
|
||||||
# For single-file: create a minimal grid structure (single "tile" = entire mosaic)
|
field_grid <- list(
|
||||||
if (mosaic_mode == "tiled") {
|
mosaic_dir = mosaic_dir,
|
||||||
tile_grid <- build_tile_grid(mosaic_dir, current_week, year)
|
mode = "per-field"
|
||||||
message(paste(" ✓ Built tile grid with", nrow(tile_grid), "tiles"))
|
)
|
||||||
} else {
|
|
||||||
# Single-file mode: create a minimal grid with just the single mosaic
|
|
||||||
message(" ✓ Using single-file mosaic (no tile grid needed)")
|
|
||||||
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
|
|
||||||
single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(single_file) == 0) {
|
|
||||||
stop("ERROR: Single-file mosaic not found in", mosaic_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create a minimal tile_grid structure with one "tile" representing the entire mosaic
|
|
||||||
tile_grid <- list(
|
|
||||||
mosaic_dir = mosaic_dir,
|
|
||||||
data = data.frame(
|
|
||||||
id = 0, # Single tile ID = 0 (full extent)
|
|
||||||
xmin = NA_real_,
|
|
||||||
xmax = NA_real_,
|
|
||||||
ymin = NA_real_,
|
|
||||||
ymax = NA_real_,
|
|
||||||
stringsAsFactors = FALSE
|
|
||||||
),
|
|
||||||
mode = "single-file",
|
|
||||||
file = single_file[1]
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
message("\nUsing modular RDS-based approach for weekly statistics...")
|
message("\nUsing modular RDS-based approach for weekly statistics...")
|
||||||
|
|
||||||
|
|
@ -599,7 +535,7 @@ main <- function() {
|
||||||
year = year,
|
year = year,
|
||||||
project_dir = project_dir,
|
project_dir = project_dir,
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
mosaic_dir = tile_grid$mosaic_dir,
|
mosaic_dir = field_grid$mosaic_dir,
|
||||||
reports_dir = reports_dir,
|
reports_dir = reports_dir,
|
||||||
report_date = end_date
|
report_date = end_date
|
||||||
)
|
)
|
||||||
|
|
@ -617,7 +553,7 @@ main <- function() {
|
||||||
year = previous_year,
|
year = previous_year,
|
||||||
project_dir = project_dir,
|
project_dir = project_dir,
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
mosaic_dir = tile_grid$mosaic_dir,
|
mosaic_dir = field_grid$mosaic_dir,
|
||||||
reports_dir = reports_dir,
|
reports_dir = reports_dir,
|
||||||
report_date = prev_report_date
|
report_date = prev_report_date
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -660,85 +660,101 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
|
||||||
|
|
||||||
message(paste("Calculating statistics for all fields - Week", week_num, year))
|
message(paste("Calculating statistics for all fields - Week", week_num, year))
|
||||||
|
|
||||||
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
|
# Per-field mode: look in per-field subdirectories
|
||||||
single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year)
|
single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year)
|
||||||
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(tile_files) == 0) {
|
# Find all field subdirectories with mosaics for this week
|
||||||
single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
|
||||||
if (length(single_file) > 0) {
|
field_dirs <- field_dirs[field_dirs != ""]
|
||||||
message(paste(" Using single-file mosaic for week", week_num))
|
|
||||||
tile_files <- single_file[1]
|
per_field_files <- list()
|
||||||
} else {
|
for (field in field_dirs) {
|
||||||
stop(paste("No mosaic files found for week", week_num, year, "in", mosaic_dir))
|
field_mosaic_dir <- file.path(mosaic_dir, field)
|
||||||
|
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
||||||
|
if (length(files) > 0) {
|
||||||
|
per_field_files[[field]] <- files[1] # Take first match for this field
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
message(paste(" Found", length(tile_files), "mosaic file(s) for week", week_num))
|
if (length(per_field_files) == 0) {
|
||||||
|
stop(paste("No per-field mosaic files found for week", week_num, year, "in", mosaic_dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num))
|
||||||
results_list <- list()
|
results_list <- list()
|
||||||
|
|
||||||
for (tile_idx in seq_along(tile_files)) {
|
# Process each field's mosaic
|
||||||
tile_file <- tile_files[tile_idx]
|
for (field_idx in seq_along(per_field_files)) {
|
||||||
|
field_name <- names(per_field_files)[field_idx]
|
||||||
|
field_file <- per_field_files[[field_name]]
|
||||||
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
current_rast <- terra::rast(tile_file)
|
current_rast <- terra::rast(field_file)
|
||||||
ci_band <- current_rast[["CI"]]
|
ci_band <- current_rast[["CI"]]
|
||||||
|
|
||||||
if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
|
if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
|
||||||
message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found"))
|
message(paste(" [SKIP] Field", field_name, "- CI band not found"))
|
||||||
return(NULL)
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE)
|
# Extract CI values for this field
|
||||||
unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)])
|
field_boundary <- field_boundaries_sf[field_boundaries_sf$field == field_name, ]
|
||||||
|
|
||||||
for (field_poly_idx in unique_field_ids) {
|
if (nrow(field_boundary) == 0) {
|
||||||
field_id <- field_boundaries_sf$field[field_poly_idx]
|
message(paste(" [SKIP] Field", field_name, "- not in field boundaries"))
|
||||||
ci_vals <- extracted$CI[extracted$ID == field_poly_idx]
|
next
|
||||||
ci_vals <- ci_vals[!is.na(ci_vals)]
|
|
||||||
|
|
||||||
if (length(ci_vals) == 0) next
|
|
||||||
|
|
||||||
mean_ci <- mean(ci_vals, na.rm = TRUE)
|
|
||||||
ci_std <- sd(ci_vals, na.rm = TRUE)
|
|
||||||
cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
|
|
||||||
range_min <- min(ci_vals, na.rm = TRUE)
|
|
||||||
range_max <- max(ci_vals, na.rm = TRUE)
|
|
||||||
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
|
|
||||||
ci_percentiles_str <- get_ci_percentiles(ci_vals)
|
|
||||||
|
|
||||||
GERMINATION_CI_THRESHOLD <- 2.0
|
|
||||||
num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE)
|
|
||||||
num_pixels_total <- length(ci_vals)
|
|
||||||
pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0
|
|
||||||
|
|
||||||
field_rows <- extracted[extracted$ID == field_poly_idx, ]
|
|
||||||
num_total <- nrow(field_rows)
|
|
||||||
num_data <- sum(!is.na(field_rows$CI))
|
|
||||||
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
|
|
||||||
cloud_cat <- if (num_data == 0) "No image available"
|
|
||||||
else if (pct_clear >= 95) "Clear view"
|
|
||||||
else "Partial coverage"
|
|
||||||
|
|
||||||
existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id)
|
|
||||||
if (length(existing_idx) > 0) next
|
|
||||||
|
|
||||||
results_list[[length(results_list) + 1]] <- data.frame(
|
|
||||||
Field_id = field_id,
|
|
||||||
Mean_CI = round(mean_ci, 2),
|
|
||||||
CV = round(cv * 100, 2),
|
|
||||||
CI_range = range_str,
|
|
||||||
CI_Percentiles = ci_percentiles_str,
|
|
||||||
Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
|
|
||||||
Cloud_pct_clear = pct_clear,
|
|
||||||
Cloud_category = cloud_cat,
|
|
||||||
stringsAsFactors = FALSE
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message(paste(" Tile", tile_idx, "of", length(tile_files), "processed"))
|
extracted <- terra::extract(ci_band, field_boundary, na.rm = FALSE)
|
||||||
|
|
||||||
|
if (nrow(extracted) == 0 || all(is.na(extracted$CI))) {
|
||||||
|
message(paste(" [SKIP] Field", field_name, "- no CI values found"))
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
ci_vals <- extracted$CI[!is.na(extracted$CI)]
|
||||||
|
|
||||||
|
if (length(ci_vals) == 0) {
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate statistics
|
||||||
|
mean_ci <- mean(ci_vals, na.rm = TRUE)
|
||||||
|
ci_std <- sd(ci_vals, na.rm = TRUE)
|
||||||
|
cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
|
||||||
|
range_min <- min(ci_vals, na.rm = TRUE)
|
||||||
|
range_max <- max(ci_vals, na.rm = TRUE)
|
||||||
|
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
|
||||||
|
ci_percentiles_str <- get_ci_percentiles(ci_vals)
|
||||||
|
|
||||||
|
num_pixels_total <- length(ci_vals)
|
||||||
|
num_pixels_gte_2 <- sum(ci_vals >= 2)
|
||||||
|
pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0
|
||||||
|
|
||||||
|
num_total <- nrow(extracted)
|
||||||
|
num_data <- sum(!is.na(extracted$CI))
|
||||||
|
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
|
||||||
|
cloud_cat <- if (num_data == 0) "No image available"
|
||||||
|
else if (pct_clear >= 95) "Clear view"
|
||||||
|
else "Partial coverage"
|
||||||
|
|
||||||
|
# Add to results
|
||||||
|
results_list[[length(results_list) + 1]] <- data.frame(
|
||||||
|
Field_id = field_name,
|
||||||
|
Mean_CI = round(mean_ci, 2),
|
||||||
|
CV = round(cv * 100, 2),
|
||||||
|
CI_range = range_str,
|
||||||
|
CI_Percentiles = ci_percentiles_str,
|
||||||
|
Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
|
||||||
|
Cloud_pct_clear = pct_clear,
|
||||||
|
Cloud_category = cloud_cat,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
message(paste(" Field", field_idx, "of", length(per_field_files), "processed"))
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message))
|
message(paste(" [ERROR] Field", field_name, ":", e$message))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ params:
|
||||||
ref: "word-styles-reference-var1.docx"
|
ref: "word-styles-reference-var1.docx"
|
||||||
output_file: CI_report.docx
|
output_file: CI_report.docx
|
||||||
report_date: "2025-09-30"
|
report_date: "2025-09-30"
|
||||||
data_dir: "aura"
|
data_dir: "angata"
|
||||||
mail_day: "Wednesday"
|
mail_day: "Wednesday"
|
||||||
borders: FALSE
|
borders: FALSE
|
||||||
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
||||||
|
|
@ -107,8 +107,9 @@ project_dir <- params$data_dir
|
||||||
# Source project parameters with error handling
|
# Source project parameters with error handling
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source(here::here("r_app", "parameters_project.R"))
|
source(here::here("r_app", "parameters_project.R"))
|
||||||
|
source(here::here("r_app", "00_common_utils.R"))
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
stop("Error loading parameters_project.R: ", e$message)
|
stop("Error loading project utilities: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Load centralized paths
|
# Load centralized paths
|
||||||
|
|
@ -363,7 +364,7 @@ safe_log(paste("Week range:", week_start, "to", week_end))
|
||||||
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||||
# Load CI quadrant data for field-level analysis
|
# Load CI quadrant data for field-level analysis
|
||||||
tryCatch({
|
tryCatch({
|
||||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||||
safe_log("Successfully loaded CI quadrant data")
|
safe_log("Successfully loaded CI quadrant data")
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
stop("Error loading CI quadrant data: ", e$message)
|
stop("Error loading CI quadrant data: ", e$message)
|
||||||
|
|
@ -840,7 +841,7 @@ The following table provides a comprehensive overview of all monitored fields wi
|
||||||
|
|
||||||
```{r detailed_field_table, echo=FALSE, results='asis'}
|
```{r detailed_field_table, echo=FALSE, results='asis'}
|
||||||
# Load CI quadrant data to get field ages
|
# Load CI quadrant data to get field ages
|
||||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||||
|
|
||||||
# Identify the current season for each field based on report_date
|
# Identify the current season for each field based on report_date
|
||||||
# The current season is the one where the report_date falls within or shortly after the season
|
# The current season is the one where the report_date falls within or shortly after the season
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ params:
|
||||||
ref: "word-styles-reference-var1.docx"
|
ref: "word-styles-reference-var1.docx"
|
||||||
output_file: CI_report.docx
|
output_file: CI_report.docx
|
||||||
report_date: "2025-09-30"
|
report_date: "2025-09-30"
|
||||||
data_dir: "aura"
|
data_dir: "angata"
|
||||||
mail_day: "Wednesday"
|
mail_day: "Wednesday"
|
||||||
borders: FALSE
|
borders: FALSE
|
||||||
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
ci_plot_type: "both" # options: "absolute", "cumulative", "both"
|
||||||
|
|
@ -110,6 +110,13 @@ tryCatch({
|
||||||
stop("Error loading parameters_project.R: ", e$message)
|
stop("Error loading parameters_project.R: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Source common utilities for logging and helper functions
|
||||||
|
tryCatch({
|
||||||
|
source(here::here("r_app", "00_common_utils.R"))
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("Error loading 00_common_utils.R: ", e$message)
|
||||||
|
})
|
||||||
|
|
||||||
# Load centralized paths
|
# Load centralized paths
|
||||||
paths <- setup_project_directories(project_dir)
|
paths <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
|
|
@ -480,7 +487,7 @@ safe_log(paste("Week range:", week_start, "to", week_end))
|
||||||
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||||
# Load CI index data with error handling
|
# Load CI index data with error handling
|
||||||
tryCatch({
|
tryCatch({
|
||||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||||
|
|
||||||
safe_log("Successfully loaded CI quadrant data")
|
safe_log("Successfully loaded CI quadrant data")
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
|
|
|
||||||
|
|
@ -301,6 +301,9 @@ load_field_boundaries <- function(data_dir) {
|
||||||
tryCatch({
|
tryCatch({
|
||||||
boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE)
|
boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE)
|
||||||
|
|
||||||
|
# Filter out features with empty geometries
|
||||||
|
boundaries_sf <- boundaries_sf[!st_is_empty(boundaries_sf), ]
|
||||||
|
|
||||||
# Repair geometries if needed
|
# Repair geometries if needed
|
||||||
if (!all(sf::st_is_valid(boundaries_sf))) {
|
if (!all(sf::st_is_valid(boundaries_sf))) {
|
||||||
boundaries_sf <- sf::st_make_valid(boundaries_sf)
|
boundaries_sf <- sf::st_make_valid(boundaries_sf)
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue