shaving off some more details...

This commit is contained in:
Timon 2026-02-09 10:40:01 +01:00
parent e16677eb78
commit 3ee3f9e31c
17 changed files with 1495 additions and 2223 deletions

613
MANUAL_PIPELINE_RUNNER.R Normal file
View file

@ -0,0 +1,613 @@
# ==============================================================================
# SMARTCANE MANUAL PIPELINE RUNNER
# ==============================================================================
#
# This file documents all pipeline steps as MANUAL COPY-PASTE COMMANDS.
# Do NOT run this script directly - instead, copy individual commands and
# paste them into your PowerShell terminal.
#
# This approach allows you to:
# - Run steps one at a time and inspect outputs
# - Re-run failed steps without re-running successful ones
# - Monitor progress between steps
# - Troubleshoot issues more easily than with automated pipeline
#
# ==============================================================================
# PIPELINE SEQUENCE (IN ORDER)
# ==============================================================================
#
# 1. Python: Download Planet satellite imagery (optional - only if new data needed)
# 2. R10: Split farm TIFFs into per-field directory structure
# 3. R20: Extract Canopy Index (CI) from 4-band imagery
# 4. R30: Interpolate growth model (smooth CI time series)
# 5. R21: Convert CI data to CSV format for Python
# 6. Python31: Harvest imminent predictions (optional - requires harvest.xlsx)
# 7. R40: Create weekly mosaic TIFFs
# 8. R80: Calculate KPIs (field uniformity, trends, stress)
# 9. R90/91: Generate Word reports (optional - Agronomic or Cane Supply)
#
# ==============================================================================
# BEFORE YOU START
# ==============================================================================
#
# 1. Open PowerShell in the smartcane root directory:
# C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\
#
# 2. Define your parameters ONCE at the top of the session:
#
# $PROJECT = "angata" # Project: angata, chemba, xinavane, esa, simba
# $END_DATE = "2026-02-04" # YYYY-MM-DD format (e.g., 2026-02-04)
# $OFFSET = 7 # Days to look back (e.g., 7 for one week)
# $WEEK = 6 # ISO week number (1-53) - auto-calculated from END_DATE
# $YEAR = 2026 # ISO year - auto-calculated from END_DATE
#
# 3. Use these variables in the commands below by replacing [PROJECT], [END_DATE], etc.
#
# ==============================================================================
# COMMAND REFERENCE
# ==============================================================================
# ==============================================================================
# STEP 0: PYTHON - Download Planet Satellite Imagery (OPTIONAL)
# ==============================================================================
#
# PURPOSE:
# Download 4-band (RGB+NIR) satellite imagery from Planet Labs API
# Downloads to: laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif
#
# WHEN TO RUN:
# - Only needed if you have new dates to process
# - Pipeline skips dates already in merged_tif/ or field_tiles/
# - First-time setup: download for your date range
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
# DATE: YYYY-MM-DD format (e.g., 2026-02-04)
# RESOLUTION: 3 meters (default) - can also use 5, 10
# --cleanup: Delete intermediate files after download
# --clear-all: Clear all output folders before downloading
#
# COMMAND #1 - Single Date Download:
#
# cd python_app
# python 00_download_8band_pu_optimized.py [PROJECT] --date [DATE] --resolution 3 --cleanup
#
# Example:
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
#
# COMMAND #2 - Batch Download (Multiple Dates):
#
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
#
# Example:
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
#
# EXPECTED OUTPUT:
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
#
# Note: Planet API requires authentication (PLANET_API_KEY environment variable)
# Cost: ~1,500-2,000 PU per date
#
# ============================================================================
# ==============================================================================
# STEP 1: R10 - Create Per-Field TIFF Structure
# ==============================================================================
#
# PURPOSE:
# Split farm-wide GeoTIFFs into per-field directory structure.
# Transforms: merged_tif/{DATE}.tif (single file)
# → field_tiles/{FIELD_ID}/{DATE}.tif (per-field files)
# This enables clean, scalable processing in downstream scripts.
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/merged_tif/{DATE}.tif (4-band RGB+NIR)
# - Field boundaries: laravel_app/storage/app/{PROJECT}/pivot.geojson
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif
# - One TIFF per field per date (1185 fields × N dates in Angata)
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT]
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
#
# EXPECTED OUTPUT:
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
# Storage location: laravel_app/storage/app/angata/field_tiles/
# Script execution time: 5-10 minutes (depends on number of dates)
#
# ============================================================================
# ==============================================================================
# STEP 2: R20 - Extract Chlorophyll Index (CI)
# ==============================================================================
#
# PURPOSE:
# Calculate Chlorophyll Index from 4-band imagery and create 5-band output TIFFs.
# Also extracts CI statistics per sub_field for daily tracking.
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD_ID}/{DATE}.tif (4-band)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif (5-band with CI)
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD_ID}/{DATE}.rds
#
# EXPECTED BEHAVIOR:
# If field_tiles_CI/ or daily_vals/ missing files, Script 20 will process them
# Script 20 skips files that already exist (to avoid re-processing)
# ⚠️ IF NOT ALL FILES CREATED: See troubleshooting section below
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - date range end
# OFFSET: Days to look back (e.g., 7 for one week window)
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
#
# EXPECTED OUTPUT:
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
# Example: 1185 fields × 8 dates = 9,480 files in field_tiles_CI/
# Storage location: laravel_app/storage/app/angata/field_tiles_CI/
# Script execution time: 10-20 minutes (depends on number of dates+fields)
#
# NOTES:
# Script 20 processes dates between (END_DATE - OFFSET) and END_DATE
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
# To process all existing merged_tif files: Use large OFFSET (e.g., 365)
#
# TROUBLESHOOTING:
# ❌ If field_tiles_CI has fewer files than field_tiles:
# - Check if all field_tiles/{FIELD}/{DATE}.tif files exist
# - Script 20 may be skipping due to incomplete source files
# - Solution: Delete problematic files from field_tiles and re-run Script 10
#
# ============================================================================
# ==============================================================================
# STEP 3: R30 - Interpolate Growth Model
# ==============================================================================
#
# PURPOSE:
# Smooth CI time series using LOESS interpolation to fill gaps.
# Creates continuous growth curves for each field across all measurement dates.
# Enables trend analysis, yield prediction, and cumulative growth metrics.
#
# INPUT:
# - Daily CI statistics from Script 20 (field_tiles_CI/ per-field RDS files)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
# - (This is the growth model output used by Script 21 and 80)
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R [PROJECT]
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
#
# EXPECTED OUTPUT:
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
# Contains: Interpolated CI data for all fields (wide format)
# Script execution time: 5-15 minutes
#
# ============================================================================
# ==============================================================================
# STEP 4: R21 - Convert CI RDS to CSV (Python Format)
# ==============================================================================
#
# PURPOSE:
# Convert growth model output from R's RDS format to Python-compatible CSV.
# Transforms from wide format (fields × dates) to long format (one row per field-date pair).
# Prepares data for Python harvest detection models.
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
# (Output from Script 30)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
# - Columns: field, sub_field, Date, FitData, DOY, value
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R [PROJECT]
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
#
# EXPECTED OUTPUT:
# File: ci_data_for_python.csv (~5-10 MB)
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
# Script execution time: 1-2 minutes
#
# ============================================================================
# ==============================================================================
# STEP 5: PYTHON31 - Harvest Imminent Predictions (OPTIONAL)
# ==============================================================================
#
# PURPOSE:
# Predict which fields are approaching harvest in the next 28 days.
# Uses neural network (Model 307) trained on historical harvest dates.
# Generates weekly probability scores for operational harvest scheduling.
#
# REQUIRES:
# - harvest.xlsx with field planting/harvest dates
# - ci_data_for_python.csv from Script 21
# - PyTorch environment (conda pytorch_gpu)
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/Data/harvest.xlsx
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
# - Columns: field, sub_field, imminent_prob, detected_prob, week, year, as_of_date, num_days
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py [PROJECT]
#
# Example:
# conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py angata
#
# EXPECTED OUTPUT:
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
# Rows: One per field (e.g., 1185 rows for Angata)
# Script execution time: 2-5 minutes
#
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
#
# ============================================================================
# ==============================================================================
# STEP 6: R40 - Create Weekly Mosaic TIFFs
# ==============================================================================
#
# PURPOSE:
# Aggregate daily per-field CI TIFFs into weekly mosaics.
# Handles multiple dates (full week) with maximum CI value per pixel.
# Creates 5-band output for reporting and KPI calculations.
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD_ID}/{DATE}.tif
# (Daily per-field CI TIFFs from Script 20)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_{WW}_{YYYY}.tif
# - One per field per week (e.g., 1185 fields × 1 week = 1,185 files)
#
# PARAMETERS:
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04) - determines ISO week
# OFFSET: Days to look back (e.g., 7 for one week window)
# PROJECT: angata, chemba, xinavane, esa, simba
#
# COMMAND:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R [END_DATE] [OFFSET] [PROJECT]
#
# Example (one week window):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
#
# Example (two week window):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 14 angata
#
# EXPECTED OUTPUT:
# Location: laravel_app/storage/app/angata/weekly_mosaic/
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
# Files created: #fields (e.g., 1185 for Angata)
# Storage: ~50-100 MB total for all mosaic TIFFs
# Script execution time: 5-10 minutes
#
# NOTE: Files are named with ISO week number (WW) and year (YYYY)
# Week calculation is automatic based on END_DATE
#
# ============================================================================
# ==============================================================================
# STEP 7: R80 - Calculate Key Performance Indicators (KPIs)
# ==============================================================================
#
# PURPOSE:
# Calculate per-field metrics from weekly mosaic TIFFs:
# - Field uniformity (CV - Coefficient of Variation)
# - Growth trends (4-week and 8-week)
# - Area change detection
# - TCH forecast
# - Spatial clustering (weed/stress detection)
# - Generates Excel export for dashboards and reporting
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD_ID}/week_*.tif
# - Field boundaries (pivot.geojson)
# - Harvest data (harvest.xlsx)
# - Historical stats cache (RDS from previous weeks)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds (cached stats)
# - 21 columns with field-level KPIs and alerts
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
# WEEK: ISO week number (1-53, optional - default current week)
# YEAR: ISO year (optional - default current year)
#
# COMMAND #1 - Current Week (Auto-detects from TODAY):
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT]
#
# Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata
#
# COMMAND #2 - Specific Week & Year:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT] [WEEK] [YEAR]
#
# Example (Week 5, Year 2026):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
#
# EXPECTED OUTPUT:
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
# Rows: One per field (e.g., 1185 for Angata)
# Columns: 21 KPI columns (uniformity, trend, alerts, etc.)
# Location: laravel_app/storage/app/angata/output/
# Script execution time: 10-20 minutes
#
# EXPECTED COLUMNS:
# field, sub_field, phase, cv (uniformity), ci_mean, area_ha, area_ac,
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
# spatial_cluster, alert_urgency, alert_type, alert_message, etc.
#
# ============================================================================
# ==============================================================================
# STEP 8: R90/R91 - Generate Word Report (OPTIONAL)
# ==============================================================================
#
# PURPOSE:
# Generate formatted Word report (.docx) with:
# - KPI summary tables and charts
# - Per-field performance metrics
# - Alerts and recommendations
# - Interpretation guides
#
# Client-Specific Reports:
# - R90: Agronomic Support (for AURA project)
# - R91: Cane Supply (for ANGATA, CHEMBA, XINAVANE, ESA)
#
# INPUT:
# - laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
# (from Script 80)
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_*.docx
# - Formatted Word document (~5-10 MB)
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-04)
# REPORT_TYPE: agronomic or cane_supply (determines which Rmd file to render)
#
# COMMAND #1 - AGRONOMIC REPORT (AURA project):
# From R console or R script:
#
# rmarkdown::render(
# "r_app/90_CI_report_with_kpis_simple.Rmd",
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
# output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx",
# output_dir = "laravel_app/storage/app/angata/reports"
# )
#
# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
# From R console or R script:
#
# rmarkdown::render(
# "r_app/91_CI_report_with_kpis_Angata.Rmd",
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
# output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
# output_dir = "laravel_app/storage/app/angata/reports"
# )
#
# EXPECTED OUTPUT:
# File: SmartCane_Report_*_{PROJECT}_{DATE}.docx
# Location: laravel_app/storage/app/{PROJECT}/reports/
# Script execution time: 5-10 minutes
#
# NOTE:
# These are R Markdown files and cannot be run directly via Rscript
# Use rmarkdown::render() from an R interactive session or wrapper script
# See run_full_pipeline.R for an automated example
#
# ============================================================================
# ==============================================================================
# QUICK REFERENCE: Common Workflows
# ==============================================================================
#
# WORKFLOW A: Weekly Update (Most Common)
# ─────────────────────────────────────────────────────────────────────────
# Goal: Process latest week of data through full pipeline
#
# Parameters:
# $PROJECT = "angata"
# $END_DATE = "2026-02-04" # Today or latest date available
# $OFFSET = 7 # One week back
#
# Steps:
# 1. SKIP Python download (if you already have data)
# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata
# 8. OPTIONAL R91 (Cane Supply) - Use automated runner:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
# OR from R console:
# rmarkdown::render("r_app/91_CI_report_with_kpis_Angata.Rmd",
# params=list(data_dir="angata", report_date=as.Date("2026-02-04")),
# output_file="SmartCane_Report_cane_supply_angata_2026-02-04.docx",
# output_dir="laravel_app/storage/app/angata/reports")
#
# Execution time: ~60-90 minutes total
#
#
# WORKFLOW B: Initial Setup (Large Backfill)
# ─────────────────────────────────────────────────────────────────────────
# Goal: Process multiple weeks of historical data
#
# Steps:
# 1. Python download (your entire date range)
# 2. Run R10 once (processes all dates)
# 3. Run R20 with large offset to process all historical dates:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
# 4. Run R30 once (growth model full season)
# 5. Run R21 once (CSV export)
# 6. Run R40 with specific week windows as needed
# 7. Run R80 for each week you want KPIs for
# 6. For each week, run:
# - R40 with different END_DATE values (one per week)
# - R80 with different WEEK/YEAR values (one per week)
# - R91 optional (one per week report)
#
# Pro tip: Script R40 with offset=14 covers two weeks at once
# Then R40 again with offset=7 for just one week
#
#
# WORKFLOW C: Troubleshooting (Check Intermediate Outputs)
# ─────────────────────────────────────────────────────────────────────────
# Goal: Verify outputs before moving to next step
#
# After R10: Check field_tiles/{FIELD_ID}/ has #dates files
# After R20: Check field_tiles_CI/{FIELD_ID}/ has same #dates files
# After R30: Check Data/extracted_ci/cumulative_vals/ has All_pivots_*.rds
# After R40: Check weekly_mosaic/{FIELD_ID}/ has week_WW_YYYY.tif per week
# After R80: Check output/ has {PROJECT}_field_analysis_week*.xlsx
#
# ============================================================================
# ==============================================================================
# TROUBLESHOOTING
# ==============================================================================
#
# ISSUE: R20 not processing all field_tiles files
# ────────────────────────────────────────────────
# Symptom: field_tiles has 496 files, field_tiles_CI only has 5
#
# Possible causes:
# 1. Source files incomplete or corrupted
# 2. Script 20 skips because CI TIFF already exists (even if incomplete)
# 3. Partial run from previous attempt
#
# Solutions:
# 1. Delete the small number of files in field_tiles_CI/{FIELD}/ (don't delete all!)
# rm laravel_app/storage/app/angata/field_tiles_CI/{fieldnum}/*
# 2. Re-run Script 20
# 3. If still fails, delete field_tiles_CI completely and re-run Script 20
# rm -r laravel_app/storage/app/angata/field_tiles_CI/
#
# ISSUE: Script 80 says "No per-field mosaic files found"
# ────────────────────────────────────────────────────────
# Symptom: R80 fails to calculate KPIs
#
# Possible causes:
# 1. Script 40 hasn't run yet (weekly_mosaic doesn't exist)
# 2. Wrong END_DATE or WEEK/YEAR combination
# 3. weekly_mosaic/{FIELD}/ directory missing (old format?)
#
# Solutions:
# 1. Ensure Script 40 has completed: Check weekly_mosaic/{FIELD}/ exists with week_WW_YYYY.tif
# 2. Verify END_DATE is within date range of available CI data
# 3. For current week: End date must be THIS week (same ISO week as today)
#
# ISSUE: Python download fails ("Not authorized")
# ────────────────────────────────────────────────
# Symptom: python 00_download_8band_pu_optimized.py fails with authentication error
#
# Cause: PLANET_API_KEY environment variable not set
#
# Solution:
# 1. Save your Planet API key: $env:PLANET_API_KEY = "your_key_here"
# 2. Verify: $env:PLANET_API_KEY (should show your key)
# 3. Try download again
#
# ISSUE: R30 takes too long
# ──────────────────────────
# Symptom: Script 30 running for >30 minutes
#
# Cause: LOESS interpolation is slow with many dates/fields
#
# Solution:
# 1. This is normal - large date ranges slow down interpolation
# 2. Subsequent runs are faster (cached results)
# 3. If needed: reduce offset or run fewer weeks at a time
#
# ==============================================================================
# ==============================================================================
# SUMMARY OF FILES CREATED BY EACH SCRIPT
# ==============================================================================
#
# Script 10 creates:
# laravel_app/storage/app/{PROJECT}/field_tiles/{FIELD}/{DATE}.tif
#
# Script 20 creates:
# laravel_app/storage/app/{PROJECT}/field_tiles_CI/{FIELD}/{DATE}.tif
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds
#
# Script 30 creates:
# laravel_app/storage/app/{PROJECT}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
#
# Script 21 creates:
# laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
#
# Python 31 creates:
# laravel_app/storage/app/{PROJECT}/reports/kpis/field_stats/{PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
#
# Script 40 creates:
# laravel_app/storage/app/{PROJECT}/weekly_mosaic/{FIELD}/{DATE}/week_{WW}_{YYYY}.tif
#
# Script 80 creates:
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
# laravel_app/storage/app/{PROJECT}/output/{PROJECT}_field_analysis_week{WW}_{YYYY}.rds
#
# Script 90/91 creates:
# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
#
# ==============================================================================

View file

@ -1,499 +0,0 @@
#' Combined: Create master grid and split TIFFs into tiles
#' ====================================================================
#'
#' Purpose:
#' 1. Check all daily TIFFs for matching extents
#' 2. Create master 5×5 grid covering all TIFFs
#' 3. Split each daily TIFF into 25 tiles using the master grid
#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
#' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18
library(terra)
library(sf)
# ============================================================================
# CONFIGURATION & COMMAND-LINE ARGUMENTS
# ============================================================================
# Parse command-line arguments for date filtering
args <- commandArgs(trailingOnly = TRUE)
# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17
start_date <- NULL
end_date <- NULL
if (length(args) >= 1) {
start_date <- as.Date(args[1])
cat("Filtering: start date =", as.character(start_date), "\n")
}
if (length(args) >= 2) {
end_date <- as.Date(args[2])
cat("Filtering: end date =", as.character(end_date), "\n")
}
PROJECT <- "angata"
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
GRID_NROWS <- 5
GRID_NCOLS <- 5
# Construct grid-specific subfolder path
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
# Load field boundaries for overlap checking
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
# ============================================================================
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
# ============================================================================
cat("\n[PART 1] Creating Master Grid\n")
# Load field boundaries for overlap checking
cat("\n[1] Checking for existing master grid...\n")
# Check if master grid already exists
MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
if (file.exists(MASTER_GRID_PATH)) {
cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "")
master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE)
field_boundaries_sf <- NULL # No need to load pivot.geojson
field_boundaries_vect <- NULL
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
} else {
# No existing grid - need to create one from pivot.geojson
cat(" No existing grid found. Creating new one from pivot.geojson...\n")
if (!file.exists(GEOJSON_PATH)) {
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
"Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ",
"from the same directory as a previous successful run (grid already exists).")
}
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "")
}
# Try to find a name column (only if field_boundaries_sf exists)
if (!is.null(field_boundaries_sf)) {
field_names <- NA
if ("name" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$name
} else if ("field" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$field
} else if ("field_name" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$field_name
} else {
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
}
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
}
# Helper function: Check if a tile overlaps with any field (simple bbox overlap)
tile_overlaps_fields <- function(tile_extent, field_geoms) {
tryCatch({
# Simple bounding box overlap test - no complex geometry operations
# Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR
# box1.ymax < box2.ymin OR box1.ymin > box2.ymax)
# For each field geometry, check if it overlaps with tile bbox
for (i in seq_len(length(field_geoms))) {
# Skip empty geometries
if (st_is_empty(field_geoms[i])) {
next
}
# Get field bbox
field_bbox <- st_bbox(field_geoms[i])
# Check bbox overlap (simple coordinate comparison)
x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax)
y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax)
if (x_overlap && y_overlap) {
return(TRUE) # Found overlap!
}
}
return(FALSE) # No overlap found
}, error = function(e) {
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
return(TRUE) # Default to including tile if there's an error
})
}
cat("\n[2] Checking TIFF extents...\n")
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
tiff_files <- sort(tiff_files)
# Filter by date range if specified
if (!is.null(start_date) || !is.null(end_date)) {
cat("\nApplying date filter...\n")
file_dates <- as.Date(sub("\\.tif$", "", tiff_files))
if (!is.null(start_date) && !is.null(end_date)) {
keep_idx <- file_dates >= start_date & file_dates <= end_date
cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "")
} else if (!is.null(start_date)) {
keep_idx <- file_dates >= start_date
cat(" From: ", as.character(start_date), "\n", sep = "")
} else {
keep_idx <- file_dates <= end_date
cat(" Until: ", as.character(end_date), "\n", sep = "")
}
tiff_files <- tiff_files[keep_idx]
cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "")
}
if (length(tiff_files) == 0) {
stop("No TIFF files found in ", TIFF_FOLDER)
}
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
cat(" Checking extents... (this may take a while)\n")
# Load all extents - ONE TIME, upfront
extents <- list()
for (i in seq_along(tiff_files)) {
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
raster <- terra::rast(tiff_path)
ext <- terra::ext(raster)
extents[[i]] <- ext
# Progress indicator every 50 files
if (i %% 50 == 0) {
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
}
}
cat(" ✓ All extents loaded\n")
# Check if all extents match
cat("\n[3] Comparing extents...\n")
tolerance <- 1e-8
all_match <- TRUE
first_ext <- extents[[1]]
for (i in 2:length(extents)) {
curr_ext <- extents[[i]]
match <- (
abs(curr_ext$xmin - first_ext$xmin) < tolerance &&
abs(curr_ext$xmax - first_ext$xmax) < tolerance &&
abs(curr_ext$ymin - first_ext$ymin) < tolerance &&
abs(curr_ext$ymax - first_ext$ymax) < tolerance
)
if (!match) {
all_match <- FALSE
cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "")
cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ",
"Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "")
cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ",
"Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "")
}
}
if (all_match) {
cat(" ✓ All TIFF extents MATCH perfectly!\n")
} else {
cat(" ⚠️ Extents differ - creating master extent covering all\n")
}
# Create master extent
cat("\n[4] Creating master extent...\n")
master_xmin <- min(sapply(extents, function(e) e$xmin))
master_xmax <- max(sapply(extents, function(e) e$xmax))
master_ymin <- min(sapply(extents, function(e) e$ymin))
master_ymax <- max(sapply(extents, function(e) e$ymax))
x_range_m <- (master_xmax - master_xmin) * 111320
y_range_m <- (master_ymax - master_ymin) * 111320
cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ",
"Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "")
cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "")
# Auto-determine grid size based on ROI dimensions
if (x_range_m < 10000 && y_range_m < 10000) {
cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n")
GRID_NROWS <- 1
GRID_NCOLS <- 1
} else {
cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n")
GRID_NROWS <- 5
GRID_NCOLS <- 5
}
N_TILES <- GRID_NROWS * GRID_NCOLS
# Check if master grid already exists
cat("\n[5] Checking if master grid exists...\n")
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
if (file.exists(master_grid_file)) {
cat(" ✓ Master grid exists! Loading existing grid...\n")
master_grid_sf <- st_read(master_grid_file, quiet = TRUE)
master_grid_vect <- terra::vect(master_grid_file)
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
} else {
cat(" Grid does not exist. Creating new master grid...\n")
# Create 5×5 grid
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
master_bbox <- st_bbox(c(
xmin = master_xmin,
xmax = master_xmax,
ymin = master_ymin,
ymax = master_ymax
), crs = 4326)
bbox_sf <- st_as_sfc(master_bbox)
master_grid <- st_make_grid(
bbox_sf,
n = c(GRID_NCOLS, GRID_NROWS),
what = "polygons"
)
master_grid_sf <- st_sf(
tile_id = sprintf("%02d", 1:length(master_grid)),
geometry = master_grid
)
cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "")
# Convert to SpatVector for use in makeTiles
master_grid_vect <- terra::vect(master_grid_sf)
# Save master grid
if (!dir.exists(OUTPUT_FOLDER)) {
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
}
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
}
# ============================================================================
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
# ============================================================================
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
# If grid was loaded from file, it's already filtered. Skip filtering.
if (!file.exists(MASTER_GRID_PATH)) {
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
# Check which tiles overlap with any field
overlapping_tile_indices <- c()
for (tile_idx in 1:nrow(master_grid_sf)) {
tile_geom <- master_grid_sf[tile_idx, ]
# Check overlap with any field
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
}
}
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
# Create filtered grid with only overlapping tiles
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
} else {
cat("\n[7] Using pre-filtered grid (already loaded from file)...\n")
# Grid was already loaded - it's already filtered
filtered_grid_sf <- master_grid_sf
}
# Convert to SpatVector for makeTiles
filtered_grid_vect <- terra::vect(filtered_grid_sf)
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
# ============================================================================
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
# ============================================================================
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
cat("\n[8] Processing each date independently...\n")
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
total_tiles_created <- 0
dates_skipped <- 0
dates_processed <- 0
for (file_idx in seq_along(tiff_files)) {
tiff_file <- tiff_files[file_idx]
date_str <- gsub("\\.tif$", "", tiff_file)
# Create date-specific output folder
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
# CHECK: Skip if date already processed (RESUME-SAFE)
if (dir.exists(date_output_folder)) {
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
if (length(existing_tiles) > 0) {
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
dates_skipped <- dates_skipped + 1
next # Skip this date
}
}
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
dates_processed <- dates_processed + 1
# Load TIFF for this date only
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
raster <- terra::rast(tiff_path)
dims <- dim(raster)
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
# Create date-specific output folder
if (!dir.exists(date_output_folder)) {
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
}
cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "")
# Use makeTiles with FILTERED grid (only overlapping tiles)
tiles_list <- terra::makeTiles(
x = raster,
y = filtered_grid_vect,
filename = file.path(date_output_folder, "tile.tif"),
overwrite = TRUE
)
# Rename tiles to [DATE]_[TILE_ID].tif
for (tile_idx in seq_along(tiles_list)) {
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
tile_id <- filtered_grid_sf$tile_id[tile_idx]
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
if (file.exists(source_file)) {
file.rename(source_file, final_file)
}
}
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
total_tiles_created <- total_tiles_created + length(tiles_list)
}
# ============================================================================
# VERIFICATION
# ============================================================================
cat("\n[9] Verifying output...\n")
# Count tiles per date folder
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
date_folders <- sort(date_folders[date_folders != "."])
total_tile_files <- 0
for (date_folder in date_folders) {
tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder),
pattern = "\\.tif$")
tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)]
total_tile_files <- total_tile_files + length(tiles_in_folder)
cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "")
}
# ============================================================================
# SUMMARY
# ============================================================================
cat("\n\n========== SUMMARY ==========\n")
cat("\nGrid Configuration:\n")
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
cat("\nField Filtering:\n")
cat(" - Field boundaries loaded from pivot.geojson\n")
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
cat(" - Significant storage savings for sparse fields!\n")
cat("\nProcessing Summary:\n")
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
if (dates_processed > 0) {
avg_tiles_per_date <- total_tiles_created / dates_processed
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
}
cat("\nDirectory Structure:\n")
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
cat(" ├── 2024-01-15/\n")
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
cat(" │ ├── 2024-01-15_05.tif\n")
cat(" │ └── ...\n")
cat(" ├── 2024-01-16/\n")
cat(" │ └── ...\n")
cat(" └── ...\n")
cat("\n⭐ Key Benefits:\n")
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
# ============================================================================
# WRITE TILING CONFIGURATION METADATA
# ============================================================================
# This metadata file is read by parameters_project.R to determine mosaic mode
# It allows script 40 to know what script 10 decided without re-computing
cat("\n[10] Writing tiling configuration metadata...\n")
config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json")
config_json <- paste0(
'{\n',
' "project": "', PROJECT, '",\n',
' "has_tiles": ', tolower(N_TILES > 1), ',\n',
' "grid_size": "', GRID_SIZE_LABEL, '",\n',
' "grid_rows": ', GRID_NROWS, ',\n',
' "grid_cols": ', GRID_NCOLS, ',\n',
' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n',
' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n',
' "created_date": "', Sys.Date(), '",\n',
' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n',
'}\n'
)
writeLines(config_json, config_file)
cat(" ✓ Metadata saved to: tiling_config.json\n")
cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "")
cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "")
cat("\n✓ Script complete!\n")

View file

@ -68,7 +68,14 @@ main <- function() {
setwd("..") setwd("..")
} }
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations) # STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) == 0) "angata" else args[1]
# Make project_dir available to sourced files (they execute in global scope)
assign("project_dir", project_dir, envir = .GlobalEnv)
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.) # Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
tryCatch({ tryCatch({
source("r_app/parameters_project.R") source("r_app/parameters_project.R")
@ -85,12 +92,31 @@ main <- function() {
stop(e) stop(e)
}) })
# STEP 3: Parse command-line arguments # STEP 4: Set default date parameters (can be overridden by pipeline runner via assign())
args <- commandArgs(trailingOnly = TRUE) # These control which dates Script 10 processes from merged_tif/
project_dir <- if (length(args) == 0) "angata" else args[1] # Window: end_date - offset days to end_date
# Always coerce to correct types to avoid issues with lingering/inherited values
if (!exists("end_date") || !inherits(end_date, "Date")) {
end_date <- as.Date("2026-02-04")
safe_log(paste("Using default end_date:", end_date), "INFO")
}
if (!exists("offset") || !is.numeric(offset)) {
offset <- 7
safe_log(paste("Using default offset:", offset, "days"), "INFO")
}
# STEP 4: Now all utilities are loaded, proceed with script logic # Ensure offset is numeric (in case it came in as a character string from environment)
# Load centralized path structure (creates all directories automatically) if (is.character(offset)) {
offset <- as.numeric(offset)
}
# Calculate date window for processing
start_date <- end_date - offset
date_window <- seq(start_date, end_date, by = "day")
date_window_str <- format(date_window, "%Y-%m-%d")
safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO")
# STEP 5: Load centralized path structure (creates all directories automatically)
paths <- setup_project_directories(project_dir) paths <- setup_project_directories(project_dir)
safe_log(paste("Project:", project_dir)) safe_log(paste("Project:", project_dir))
@ -109,7 +135,9 @@ main <- function() {
# PHASE 1: Process new downloads (always runs) # PHASE 1: Process new downloads (always runs)
# Pass field_tiles_ci_dir so it can skip dates already migrated # Pass field_tiles_ci_dir so it can skip dates already migrated
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir) # Also pass end_date and offset so only dates in window are processed
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir,
end_date = end_date, offset = offset)
safe_log("\n========================================", "INFO") safe_log("\n========================================", "INFO")
safe_log("FINAL SUMMARY", "INFO") safe_log("FINAL SUMMARY", "INFO")

View file

@ -156,6 +156,11 @@ crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {
#' TIFFs are stored. If provided, skips dates #' TIFFs are stored. If provided, skips dates
#' already processed and moved to field_tiles_CI/. #' already processed and moved to field_tiles_CI/.
#' Default: NULL (process all dates). #' Default: NULL (process all dates).
#' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD).
#' Default: NULL (process all available dates).
#' @param offset Integer. Optional. Number of days to look back from end_date.
#' Only used if end_date is also provided.
#' Default: NULL (process all available dates).
#' #'
#' @return List with elements: #' @return List with elements:
#' - total_created: Integer. Total field TIFFs created across all dates #' - total_created: Integer. Total field TIFFs created across all dates
@ -187,7 +192,8 @@ crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {
#' result$total_created, result$total_skipped, result$total_errors)) #' result$total_created, result$total_skipped, result$total_errors))
#' } #' }
#' #'
process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL) { process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL,
end_date = NULL, offset = NULL) {
safe_log("\n========================================", "INFO") safe_log("\n========================================", "INFO")
safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO") safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO")
@ -211,6 +217,19 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel
full.names = TRUE full.names = TRUE
) )
# FILTER by date window if end_date and offset provided
if (!is.null(end_date) && !is.null(offset)) {
start_date <- end_date - offset
date_range <- seq(start_date, end_date, by = "day")
date_range_str <- format(date_range, "%Y-%m-%d")
# Extract dates from filenames and filter
tiff_dates <- gsub("\\.tif$", "", basename(tiff_files))
tiff_files <- tiff_files[tiff_dates %in% date_range_str]
safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO")
}
safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO") safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO")
if (length(tiff_files) == 0) { if (length(tiff_files) == 0) {
@ -226,7 +245,7 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel
for (tif_path in tiff_files) { for (tif_path in tiff_files) {
tif_date <- gsub("\\.tif$", "", basename(tif_path)) tif_date <- gsub("\\.tif$", "", basename(tif_path))
# MIGRATION MODE CHECK: Skip if this date was already migrated to field_tiles_CI/ # CHECK 1: Skip if this date was already migrated to field_tiles_CI/
# (This means Script 20 already processed it and extracted RDS) # (This means Script 20 already processed it and extracted RDS)
if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) { if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) {
# Check if ANY field has this date in field_tiles_CI/ # Check if ANY field has this date in field_tiles_CI/
@ -249,6 +268,28 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, fiel
} }
} }
# CHECK 2: Skip if this date already exists in field_tiles/
# (means this date has already been processed through Script 10)
if (dir.exists(field_tiles_dir)) {
date_exists_in_field_tiles <- FALSE
# Check if ANY field directory has this date
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
for (field_dir in field_dirs) {
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
if (file.exists(potential_file)) {
date_exists_in_field_tiles <- TRUE
break
}
}
if (date_exists_in_field_tiles) {
safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO")
total_skipped <- total_skipped + 1
next
}
}
safe_log(paste("Processing:", tif_date), "INFO") safe_log(paste("Processing:", tif_date), "INFO")
result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir) result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir)

View file

@ -1,366 +0,0 @@
# ============================================================================
# SCRIPT 20: Canopy Index (CI) Extraction from Satellite Imagery
# ============================================================================
# PURPOSE:
# Extract Canopy Index (CI) from 4-band or 8-band satellite imagery and
# mask by field boundaries. Supports automatic band detection, cloud masking
# with UDM2 (8-band), and per-field CI value extraction. Produces both
# per-field TIFFs and consolidated CI statistics for growth model input.
#
# INPUT DATA:
# - Source: laravel_app/storage/app/{project}/field_tiles/{FIELD}/{DATE}.tif
# - Format: GeoTIFF (4-band RGB+NIR from Planet API, or 8-band with UDM2)
# - Requirement: Field boundaries (pivot.geojson) for masking
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/field_tiles_CI/{FIELD}/{DATE}.tif
# - Format: GeoTIFF (5-band: R,G,B,NIR,CI as float32)
# - Also exports: combined_CI/combined_CI_data.rds (wide format: fields × dates)
#
# USAGE:
# Rscript 20_ci_extraction.R [end_date] [offset] [project] [data_source]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction.R 2026-01-02 7 angata merged_tif
#
# PARAMETERS:
# - end_date: End date for processing (character, YYYY-MM-DD format)
# - offset: Days to look back from end_date (numeric, default 7)
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
# - data_source: Data source directory (character, optional) - "merged_tif" (default), "merged_tif_8b", "merged_final_tif"
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - core data processing
# - agronomic_support (AURA): Yes - supports field health monitoring
#
# DEPENDENCIES:
# - Packages: terra, sf, tidyverse, lubridate, readxl, furrr, future
# - Utils files: parameters_project.R, 00_common_utils.R, 20_ci_extraction_utils.R
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
# - Data directories: field_tiles/, field_tiles_CI/, combined_CI/
#
# NOTES:
# - CI formula: (NIR - Red) / (NIR + Red); normalized to 0-5 range
# - 8-band data automatically cloud-masked using UDM2 (band 7-8)
# - 4-band data assumes clear-sky Planet PSScene imagery
# - Parallel processing via furrr for speed optimization
# - Output RDS uses wide format (fields as rows, dates as columns) for growth model
# - Critical dependency for Script 30 (growth model) and Script 80 (KPIs)
#
# RELATED ISSUES:
# SC-112: Utilities restructuring
# SC-108: Core pipeline improvements
#
# ============================================================================
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
# Spatial data handling
library(sf) # For reading/manipulating field boundaries (GeoJSON)
library(terra) # For raster operations (CI extraction from TIFFs)
# Data manipulation
library(tidyverse) # For dplyr, ggplot2, readr (data wrangling and visualization)
library(lubridate) # For date/time operations (parsing satellite dates)
# File I/O
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
library(here) # For relative path resolution (platform-independent file paths)
})
# 2. Process command line arguments
# ------------------------------
main <- function() {
# Capture command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Process end_date argument
if (length(args) >= 1 && !is.na(args[1]) && args[1] != "") {
# Parse date explicitly in YYYY-MM-DD format from command line
end_date <- as.Date(args[1], format = "%Y-%m-%d")
if (is.na(end_date)) {
warning("Invalid end_date provided. Using default (current date).")
end_date <- Sys.Date()
#end_date <- "2023-10-01"
}
} else {
end_date <- Sys.Date()
#end_date <- "2023-10-01"
}
# Process offset argument
if (length(args) >= 2 && !is.na(args[2])) {
offset <- as.numeric(args[2])
if (is.na(offset) || offset <= 0) {
warning("Invalid offset provided. Using default (7 days).")
offset <- 7
}
} else {
offset <- 7
}
# Process project_dir argument
if (length(args) >= 3 && !is.na(args[3])) {
project_dir <- as.character(args[3])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
project_dir <- "angata" # Changed default from "aura" to "esa"
}
# Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif)
if (length(args) >= 4 && !is.na(args[4])) {
data_source <- as.character(args[4])
# Validate data_source is a recognized option
if (!data_source %in% c("merged_tif_8b", "merged_tif", "merged_final_tif")) {
warning(paste("Data source", data_source, "not in standard list. Using as-is."))
}
} else if (exists("data_source", envir = .GlobalEnv)) {
data_source <- get("data_source", envir = .GlobalEnv)
} else {
data_source <- "merged_tif_8b" # Default to 8-band (newer data with cloud masking)
}
# Make project_dir and data_source available globally
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
cat(sprintf("CI Extraction: project=%s, end_date=%s, offset=%d days, data_source=%s\n",
project_dir, format(end_date, "%Y-%m-%d"), offset, data_source))
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
ci_extraction_script <- TRUE
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
# 3. Initialize project configuration
# --------------------------------
new_project_question <- TRUE
cat("[DEBUG] Attempting to source r_app/parameters_project.R\n")
tryCatch({
source("r_app/parameters_project.R")
cat("[DEBUG] Successfully sourced r_app/parameters_project.R\n")
}, error = function(e) {
cat("[ERROR] Failed to source r_app/parameters_project.R:\n", e$message, "\n")
stop(e)
})
# Load centralized path structure (creates all directories automatically)
paths <- setup_project_directories(project_dir)
cat("[DEBUG] Attempting to source r_app/00_common_utils.R\n")
tryCatch({
source("r_app/00_common_utils.R")
cat("[DEBUG] Successfully sourced r_app/00_common_utils.R\n")
}, error = function(e) {
cat("[ERROR] Failed to source r_app/00_common_utils.R:\n", e$message, "\n")
stop(e)
})
cat("[DEBUG] Attempting to source r_app/20_ci_extraction_utils.R\n")
tryCatch({
source("r_app/20_ci_extraction_utils.R")
cat("[DEBUG] Successfully sourced r_app/20_ci_extraction_utils.R\n")
}, error = function(e) {
cat("[ERROR] Failed to source r_app/20_ci_extraction_utils.R:\n", e$message, "\n")
stop(e)
})
# 4. Generate date list for processing
# ---------------------------------
dates <- date_list(end_date, offset)
log_message(paste("Processing data for week", dates$week, "of", dates$year))
# 4a. CHECK DAILY CI EXTRACTION - Skip dates that already have extracted files
# -------------------------------------------------------------------------
log_message("\n===== CHECKING DAILY CI EXTRACTION STATUS =====")
# Check which dates already have extracted CI files
already_extracted <- c()
missing_extraction <- c()
if (dir.exists(daily_CI_vals_dir)) {
existing_ci_files <- list.files(daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$")
# Extract dates from filenames like "extracted_2025-12-31_quadrant.rds"
already_extracted <- sub("^extracted_(.+)_.*\\.rds$", "\\1", existing_ci_files)
}
# Find which dates in our processing range need extraction
missing_extraction <- dates$days_filter[!(dates$days_filter %in% already_extracted)]
cat(sprintf("[CI CHECK] Already extracted: %d dates\n", length(already_extracted)))
cat(sprintf("[CI CHECK] Need extraction: %d dates (from %s to %s)\n",
length(missing_extraction),
if(length(missing_extraction) > 0) min(missing_extraction) else "N/A",
if(length(missing_extraction) > 0) max(missing_extraction) else "N/A"))
# If any dates need extraction, we'll extract them
# If NO dates need extraction, we'll skip extraction but ALWAYS rebuild combined_CI_data.rds
skip_extraction <- (length(missing_extraction) == 0)
if (skip_extraction) {
log_message("✓ All dates in processing range already have extracted CI files - skipping extraction")
log_message("⚠ Will rebuild combined_CI_data.rds to ensure completeness")
}
# 4b. CHECK SOURCE DATA AVAILABILITY
# ---------------------------------------------------------------
# Verify that source data exists for dates we're going to extract
# If a date is missing from source, we'll skip it gracefully
log_message("\n===== CHECKING SOURCE DATA AVAILABILITY =====")
dates_with_source <- c()
dates_missing_source <- c()
if (!skip_extraction && length(missing_extraction) > 0) {
# Check which source dates are actually available
for (date_str in missing_extraction) {
# Look for the date in merged_tif directory
source_file_pattern <- sprintf("%s\\.tif$", date_str)
files_for_date <- list.files(planet_tif_folder, pattern = source_file_pattern)
if (length(files_for_date) > 0) {
dates_with_source <- c(dates_with_source, date_str)
} else {
dates_missing_source <- c(dates_missing_source, date_str)
}
}
cat(sprintf("[SOURCE CHECK] Dates with available source data: %d\n", length(dates_with_source)))
cat(sprintf("[SOURCE CHECK] Dates missing from source (will skip): %d\n", length(dates_missing_source)))
if (length(dates_missing_source) > 0) {
log_message(paste("⚠ Skipping extraction for missing source dates:", paste(dates_missing_source, collapse = ", ")))
}
}
# 5. Find and filter raster files by date - with grid size detection
# -----------------------------------
log_message("Searching for raster files")
# Check if tiles exist (Script 10 output) - detect grid size dynamically using centralized paths
tiles_split_base <- paths$daily_tiles_split_dir
# Detect grid size from daily_tiles_split folder structure
# Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc.
grid_size <- NA
if (dir.exists(tiles_split_base)) {
subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
# Look for grid size patterns like "5x5", "10x10", "20x20"
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
grid_size <- grid_patterns[1] # Use first grid size found
log_message(paste("Detected grid size:", grid_size))
}
}
# Construct tile folder path with grid size
if (!is.na(grid_size)) {
tile_folder <- file.path(tiles_split_base, grid_size)
} else {
tile_folder <- tiles_split_base
}
use_tiles <- dir.exists(tile_folder)
# Make grid_size available globally for other functions
assign("grid_size", grid_size, envir = .GlobalEnv)
tryCatch({
if (skip_extraction) {
log_message("\n===== SKIPPING CI EXTRACTION (all dates already processed) =====")
} else if (use_tiles) {
# Use tile-based processing
log_message(paste("Tile folder detected at", tile_folder))
log_message("Using tile-based CI extraction")
# Call the tile-based extraction function
process_ci_values_from_tiles(
dates = dates,
tile_folder = tile_folder,
field_boundaries = field_boundaries,
field_boundaries_sf = field_boundaries_sf,
daily_CI_vals_dir = daily_CI_vals_dir,
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
merged_final_dir = merged_final,
grid_size = grid_size
)
} else {
# Use legacy full-extent processing
log_message("No tiles found. Using legacy full-extent approach")
# Use the existing utility function to find satellite images
existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter)
log_message(paste("Found", length(existing_files), "raster files for processing"))
# Process raster files and create VRT
vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt)
# Process and combine CI values
process_ci_values(dates, field_boundaries, merged_final,
field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir)
}
}, error = function(e) {
log_message(paste("Error in main processing:", e$message), level = "ERROR")
stop(e$message)
})
# 6. REBUILD combined_CI_data.rds from ALL daily extracted files
# -----------------------------------------------
# This ensures the combined file is complete and up-to-date
# even if extraction was skipped (because dates already existed)
# NOTE: Only rebuild if new dates were successfully extracted
# If all dates were missing from source, skip this step to avoid corrupting the file
log_message("\n===== HANDLING combined_CI_data.rds =====")
if (length(dates_with_source) == 0 && length(missing_extraction) > 0) {
# All missing dates had no source data - skip combined_CI_data.rds update
log_message("⚠ No new dates extracted (all source data missing) - skipping combined_CI_data.rds update")
} else if (skip_extraction) {
# All dates already extracted - optionally rebuild for consistency
log_message("✓ All dates already extracted - combined_CI_data.rds is up-to-date")
} else {
# New dates were extracted - rebuild combined_CI_data.rds from ALL daily files
log_message("Rebuilding combined_CI_data.rds from all daily extracted files...")
tryCatch({
if (!dir.exists(daily_CI_vals_dir)) {
log_message("Daily CI directory does not exist yet", level = "WARNING")
} else {
# List ALL daily CI files (not just new ones)
all_daily_files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE)
if (length(all_daily_files) == 0) {
log_message("No daily CI files found to combine", level = "WARNING")
} else {
log_message(paste("Combining all", length(all_daily_files), "daily CI files into combined_CI_data.rds"))
# Load and combine ALL daily files (creates complete dataset)
combined_ci_path <- file.path(paths$cumulative_ci_vals_dir, "combined_CI_data.rds")
combined_data <- all_daily_files %>%
purrr::map(readRDS) %>%
purrr::list_rbind() %>%
dplyr::group_by(sub_field)
# Save the rebuilt combined data
saveRDS(combined_data, combined_ci_path)
log_message(paste("✓ Rebuilt combined_CI_data.rds with", nrow(combined_data), "total rows"))
}
}
}, error = function(e) {
log_message(paste("⚠ Error rebuilding combined_CI_data.rds (will skip):", e$message), level = "WARNING")
log_message(" Note: This is OK - Script 30 will use growth model RDS instead", level = "WARNING")
})
}
}
if (sys.nframe() == 0) {
main()
}

View file

@ -80,9 +80,18 @@ main <- function() {
}) })
# Get list of dates to process # Get list of dates to process
dates <- date_list(end_date, offset) # If in migration mode, dates_to_process is provided by the pipeline runner
safe_log(sprintf("Processing dates: %s to %s (%d dates)", if (exists("dates_to_process") && !is.null(dates_to_process)) {
dates$start_date, dates$end_date, length(dates$days_filter))) # Migration mode: Use provided list of dates (process ALL available dates)
dates_filter <- sort(dates_to_process)
safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter)))
} else {
# Normal mode: Use 7-day offset window
dates <- date_list(end_date, offset)
dates_filter <- dates$days_filter
safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)",
dates$start_date, dates$end_date, length(dates_filter)))
}
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir)) safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir)) safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
@ -123,7 +132,7 @@ main <- function() {
total_error <- 0 total_error <- 0
ci_results_by_date <- list() ci_results_by_date <- list()
for (date_str in dates$days_filter) { for (date_str in dates_filter) {
# Load the merged TIFF ONCE for this date # Load the merged TIFF ONCE for this date
merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str)) merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str))

View file

@ -7,9 +7,9 @@
# models and Python ML workflows without requiring interpolated/modeled values. # models and Python ML workflows without requiring interpolated/modeled values.
# #
# INPUT DATA: # INPUT DATA:
# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds # - Source: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
# - Format: RDS (wide format: fields × dates with CI values) # - Format: RDS (interpolated growth model data from Script 30)
# - Requirement: Script 20 must have completed CI extraction # - Requirement: Script 30 must have completed growth model interpolation
# #
# OUTPUT DATA: # OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/ # - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
@ -36,12 +36,12 @@
# - Data directories: extracted_ci/cumulative_vals/ # - Data directories: extracted_ci/cumulative_vals/
# #
# NOTES: # NOTES:
# - Transformation: Wide format (fields as rows, dates as columns) → Long format # - Data source: Uses interpolated CI data from Script 30 (growth model output)
# - Time series: Preserves all CI values without interpolation # - Handles both wide format and long format inputs from growth model
# - DOY (Day of Year): Calculated from date for seasonal analysis # - DOY (Day of Year): Calculated from date for seasonal analysis
# - Python integration: CSV format compatible with pandas/scikit-learn workflows # - Python integration: CSV format compatible with pandas/scikit-learn workflows
# - Used by: Python harvest detection models (harvest_date_prediction.py) # - Used by: Python harvest detection models (harvest_date_prediction.py)
# - Optional: Run only when exporting to Python for ML model training # - Exports complete growth curves with interpolated values for ML training
# #
# RELATED ISSUES: # RELATED ISSUES:
# SC-112: Utilities restructuring # SC-112: Utilities restructuring
@ -199,39 +199,56 @@ main <- function() {
ci_data_source_dir <- paths$cumulative_ci_vals_dir ci_data_source_dir <- paths$cumulative_ci_vals_dir
ci_data_output_dir <- paths$ci_for_python_dir ci_data_output_dir <- paths$ci_for_python_dir
input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds") # Try to load interpolated growth model data from Script 30
input_file <- file.path(ci_data_source_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv") output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
# Check if input file exists # Check if input file exists
if (!file.exists(input_file)) { if (!file.exists(input_file)) {
stop(paste("Input file not found:", input_file)) stop(paste("Input file not found:", input_file,
"\nScript 30 (growth model) must be run before Script 21."))
} }
cat(sprintf("Loading: %s\n", input_file)) cat(sprintf("Loading: %s\n", input_file))
# Load RDS file # Load RDS file (from Script 30 - already in long format with interpolated values)
ci_data_wide <- readRDS(input_file) %>% ci_data <- readRDS(input_file) %>%
as_tibble() as_tibble()
cat(sprintf(" Loaded %d rows\n", nrow(ci_data_wide))) cat(sprintf(" Loaded %d rows\n", nrow(ci_data)))
cat(sprintf(" Format: WIDE (field, sub_field, then dates as columns)\n")) cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", ")))
cat(sprintf(" Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", ")))
# Step 1: Convert from WIDE to LONG format # Check format and prepare for export
cat("\nStep 1: Converting from wide to long format...\n") # If it's already in long format (from Script 30), use as-is
ci_data_long <- wide_to_long_ci_data(ci_data_wide) # Otherwise, convert from wide to long
if ("Date" %in% names(ci_data) || "date" %in% names(ci_data)) {
cat(" Detected: LONG format (from growth model)\n")
ci_data_long <- ci_data
} else {
cat(" Detected: WIDE format - converting to long...\n")
ci_data_long <- wide_to_long_ci_data(ci_data)
}
# Step 2: Create complete daily sequences with interpolation # Step 1: Ensure Date column exists and is properly formatted
cat("\nStep 2: Creating complete daily sequences with interpolation...\n") ci_data_long <- ci_data_long %>%
ci_data_python <- create_interpolated_daily_sequences(ci_data_long) mutate(
Date = as.Date(Date)
)
# Step 3: Validate output # Step 2: If interpolated values already present, use them; otherwise create interpolated sequences
cat("\nStep 3: Validating output...") if ("value" %in% names(ci_data_long)) {
validate_conversion_output(ci_data_python) # Already has interpolated values from Script 30
cat("\nStep 2: Using interpolated values from growth model...\n")
ci_data_python <- ci_data_long
} else {
# Create interpolated daily sequences
cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
}
# Step 4: Save to CSV # Step 4: Save to CSV
cat(sprintf("\nStep 4: Saving to CSV...\n")) cat(sprintf("\nStep 4: Saving to CSV...\\n"))
cat(sprintf(" Output: %s\n", output_file)) cat(sprintf(" Output: %s\\n", output_file))
write_csv(ci_data_python, output_file) write_csv(ci_data_python, output_file)
cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python))) cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))

View file

@ -115,15 +115,16 @@ load_combined_ci_data <- function(daily_vals_dir) {
#' @param harvesting_data Dataframe with harvesting information #' @param harvesting_data Dataframe with harvesting information
#' @param field_CI_data Dataframe with CI measurements #' @param field_CI_data Dataframe with CI measurements
#' @param season Year of the growing season #' @param season Year of the growing season
#' @param verbose Logical: whether to log warnings/info (default TRUE). Set to FALSE during progress bar iteration.
#' @return Dataframe with interpolated daily CI values #' @return Dataframe with interpolated daily CI values
#' #'
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) { extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, verbose = TRUE) {
# Filter harvesting data for the given season and field name # Filter harvesting data for the given season and field name
filtered_harvesting_data <- harvesting_data %>% filtered_harvesting_data <- harvesting_data %>%
dplyr::filter(year == season, sub_field == field_name) dplyr::filter(year == season, sub_field == field_name)
if (nrow(filtered_harvesting_data) == 0) { if (nrow(filtered_harvesting_data) == 0) {
safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING") if (verbose) safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
return(data.frame()) return(data.frame())
} }
@ -133,7 +134,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
# Return an empty data frame if no CI data is found # Return an empty data frame if no CI data is found
if (nrow(filtered_field_CI_data) == 0) { if (nrow(filtered_field_CI_data) == 0) {
safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING") if (verbose) safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
return(data.frame()) return(data.frame())
} }
@ -157,12 +158,14 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
# If CI is empty after filtering, return an empty dataframe # If CI is empty after filtering, return an empty dataframe
if (nrow(CI) == 0) { if (nrow(CI) == 0) {
safe_log(paste0("No CI data within season dates for field: ", field_name, if (verbose) {
" (Season: ", season, ", dates: ", safe_log(paste0("No CI data within season dates for field: ", field_name,
format(season_start, "%Y-%m-%d"), " to ", " (Season: ", season, ", dates: ",
format(season_end, "%Y-%m-%d"), format(season_start, "%Y-%m-%d"), " to ",
"). Available CI data range: ", ci_date_range), format(season_end, "%Y-%m-%d"),
"WARNING") "). Available CI data range: ", ci_date_range),
"WARNING")
}
return(data.frame()) return(data.frame())
} }
@ -175,20 +178,17 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
subField = field_name subField = field_name
) )
# Log successful interpolation # Return data with success status
safe_log(paste0("Successfully interpolated CI data for field: ", field_name,
" (Season: ", season, ", dates: ",
format(season_start, "%Y-%m-%d"), " to ",
format(season_end, "%Y-%m-%d"),
"). ", nrow(CI), " data points created."))
return(CI) return(CI)
}, error = function(e) { }, error = function(e) {
safe_log(paste0("Error interpolating CI data for field ", field_name, # Return empty dataframe on error (will be tracked separately)
" in season ", season, if (verbose) {
" (", format(season_start, "%Y-%m-%d"), " to ", safe_log(paste0("Error interpolating CI data for field ", field_name,
format(season_end, "%Y-%m-%d"), " in season ", season,
"): ", e$message), "ERROR") " (", format(season_start, "%Y-%m-%d"), " to ",
format(season_end, "%Y-%m-%d"),
"): ", e$message), "ERROR")
}
return(data.frame()) return(data.frame())
}) })
} }
@ -203,17 +203,19 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season)
generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) { generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
safe_log("Starting CI data interpolation for all fields") safe_log("Starting CI data interpolation for all fields")
# Track failed fields for end-of-run summary
failed_fields <- list()
total_fields <- 0
successful_fields <- 0
# Process each year # Process each year
result <- purrr::map_df(years, function(yr) { result <- purrr::map_df(years, function(yr) {
safe_log(paste("Processing year:", yr))
# Get the fields harvested in this year with valid season start dates # Get the fields harvested in this year with valid season start dates
sub_fields <- harvesting_data %>% sub_fields <- harvesting_data %>%
dplyr::filter(year == yr, !is.na(season_start)) %>% dplyr::filter(year == yr, !is.na(season_start)) %>%
dplyr::pull(sub_field) dplyr::pull(sub_field)
if (length(sub_fields) == 0) { if (length(sub_fields) == 0) {
safe_log(paste("No fields with valid season data for year:", yr), "WARNING")
return(data.frame()) return(data.frame())
} }
@ -222,24 +224,64 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
purrr::keep(~ any(ci_data$sub_field == .x)) purrr::keep(~ any(ci_data$sub_field == .x))
if (length(valid_sub_fields) == 0) { if (length(valid_sub_fields) == 0) {
safe_log(paste("No fields with CI data for year:", yr), "WARNING")
return(data.frame()) return(data.frame())
} }
# Extract and interpolate data for each valid field # Initialize progress bar for this year
safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr)) total_fields <<- total_fields + length(valid_sub_fields)
pb <- txtProgressBar(min = 0, max = length(valid_sub_fields), style = 3, width = 50)
counter <- 0
result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x, # Extract and interpolate data for each valid field with progress bar
harvesting_data = harvesting_data, result_list <- list()
field_CI_data = ci_data, for (field in valid_sub_fields) {
season = yr)) %>% counter <- counter + 1
purrr::list_rbind() setTxtProgressBar(pb, counter)
# Call with verbose=FALSE to suppress warnings during progress bar iteration
field_result <- extract_CI_data(field,
harvesting_data = harvesting_data,
field_CI_data = ci_data,
season = yr,
verbose = FALSE)
if (nrow(field_result) > 0) {
successful_fields <<- successful_fields + 1
result_list[[field]] <- field_result
} else {
# Track failed field
failed_fields[[length(failed_fields) + 1]] <<- list(
field = field,
season = yr,
reason = "Unable to generate interpolated data"
)
}
}
close(pb)
cat("\n") # Newline after progress bar
safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr)) # Combine all results for this year
return(result) if (length(result_list) > 0) {
purrr::list_rbind(result_list)
} else {
data.frame()
}
}) })
safe_log(paste("Total interpolated data points:", nrow(result))) # Print summary
safe_log(sprintf("\n=== Interpolation Summary ==="))
safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields))
if (length(failed_fields) > 0) {
safe_log(sprintf("Failed to interpolate: %d fields", length(failed_fields)))
for (failure in failed_fields) {
safe_log(sprintf(" - Field %s (Season %d): %s",
failure$field, failure$season, failure$reason), "WARNING")
}
}
safe_log(sprintf("Total interpolated data points: %d", nrow(result)))
return(result) return(result)
} }

View file

@ -1,296 +0,0 @@
# ============================================================================
# SCRIPT 40: Weekly Mosaic Creation (CI Band Aggregation)
# ============================================================================
# PURPOSE:
# Create weekly 5-band (R, G, B, NIR, CI) mosaics from daily satellite
# imagery. Aggregates multi-day CI data into single weekly composite raster
# for field-level analysis. Supports per-field or single-file architectures.
#
# INPUT DATA:
# - Daily per-field TIFFs: laravel_app/storage/app/{project}/daily_tiles/{YYYY-MM-DD}/*.tif
# (or single-file mosaics: merged_tif/{YYYY-MM-DD}.tif + pivot.geojson masking)
# - CI data (RDS): laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
# - Field boundaries: laravel_app/storage/app/{project}/pivot.geojson
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/weekly_mosaic/
# - Format: 5-band GeoTIFF (uint16)
# - Naming: week_{WW}.tif (week number + year, e.g., week_35_2025.tif)
# - Spatial: Raster aligned to field boundaries; CRS preserved
#
# USAGE:
# Rscript 40_mosaic_creation.R [end_date] [offset] [project] [file_name] [data_source]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 aura
#
# PARAMETERS:
# - end_date: End date (YYYY-MM-DD format); required for weekly aggregation
# - offset: Days to look back (typically 7 for one week)
# - project: Project name (aura, angata, chemba, xinavane, esa, simba)
# - file_name: Custom output filename (optional; default: week_{WW}_{YYYY}.tif)
# - data_source: Data folder (optional; auto-detects merged_tif or merged_tif_8b)
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - harvest readiness timeline depends on weekly mosaic
# - agronomic_support (AURA): Yes - KPI calculation requires weekly CI bands
#
# DEPENDENCIES:
# - Packages: sf, terra, tidyverse, lubridate, here
# - Utils files: parameters_project.R, 00_common_utils.R, 40_mosaic_creation_utils.R
# - Input data: Daily per-field TIFFs (Script 10) + CI extraction (Script 20)
# - Data: field boundaries (pivot.geojson), harvest dates (if available)
#
# NOTES:
# - Weekly aggregation: Combines 7 days of daily data into single composite
# - 5-band output: R, G, B, NIR, and Canopy Index (CI) derived from NDVI
# - Tiling support: Handles per-field TIFF architecture; auto-mosaics if needed
# - Data source auto-detection: Searches merged_tif/ or merged_tif_8b/ folders
# - Command-line driven: Designed for batch scheduling (cron/Task Scheduler)
# - Downstream: Script 80 (KPI calculation) depends on weekly_mosaic/ output
# - Performance: Multi-file mosaicing (~25 fields) takes 5-10 minutes per week
#
# RELATED ISSUES:
# SC-113: Script header standardization
# SC-112: Utilities restructuring
# SC-111: Script 10 geometry validation
#
# ============================================================================
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Spatial data handling
library(sf) # For spatial operations (field boundary masking)
library(terra) # For raster operations (reading/writing/stacking GeoTIFFs)
# Data manipulation
library(tidyverse) # For dplyr, readr (data wrangling)
library(lubridate) # For date/time operations (week extraction, date formatting)
})
# 2. Process command line arguments and run mosaic creation
# ------------------------------------------------------
main <- function() {
# Capture command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Process project_dir argument with default
if (length(args) >= 3 && !is.na(args[3])) {
project_dir <- as.character(args[3])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
# Default project directory
project_dir <- "angata"
message("No project_dir provided. Using default:", project_dir)
}
# Make project_dir available globally so parameters_project.R can use it
assign("project_dir", project_dir, envir = .GlobalEnv)
# Process end_date argument with default
if (length(args) >= 1 && !is.na(args[1])) {
# Parse date explicitly in YYYY-MM-DD format from command line
end_date <- as.Date(args[1], format = "%Y-%m-%d")
if (is.na(end_date)) {
message("Invalid end_date provided. Using current date.")
end_date <- Sys.Date()
}
} else if (exists("end_date_str", envir = .GlobalEnv)) {
end_date <- as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
# Default to current date if no argument is provided
end_date <- Sys.Date()
message("No end_date provided. Using current date: ", format(end_date))
}
# Process offset argument with default
if (length(args) >= 2 && !is.na(args[2])) {
offset <- as.numeric(args[2])
if (is.na(offset) || offset <= 0) {
message("Invalid offset provided. Using default (7 days).")
offset <- 7
}
} else {
# Default to 7 days if no argument is provided
offset <- 7
message("No offset provided. Using default:", offset, "days")
}
# Process data_source argument (optional, passed from pipeline)
# If provided, use it; otherwise auto-detect
data_source_from_args <- NULL
if (length(args) >= 5 && !is.na(args[5]) && nchar(args[5]) > 0) {
data_source_from_args <- as.character(args[5])
message("Data source explicitly provided via arguments: ", data_source_from_args)
}
# 3. Initialize project configuration
# --------------------------------
# Detect which data source directory exists (merged_tif or merged_tif_8b)
# IMPORTANT: Only consider a folder as valid if it contains actual files
laravel_storage <- here::here("laravel_app/storage/app", project_dir)
# Load centralized path structure
tryCatch({
source("r_app/parameters_project.R")
paths <- setup_project_directories(project_dir)
}, error = function(e) {
message("Note: Could not open files from r_app directory")
message("Attempting to source from default directory instead...")
tryCatch({
source("parameters_project.R")
paths <- setup_project_directories(project_dir)
message("✓ Successfully sourced files from default directory")
}, error = function(e) {
stop("Failed to source required files from both 'r_app' and default directories.")
})
})
data_source <- if (has_8b_data) {
message("Auto-detected data source: merged_tif_8b (8-band optimized) - contains files")
"merged_tif_8b"
} else if (has_legacy_data) {
message("Auto-detected data source: merged_tif (legacy 4-band) - contains files")
"merged_tif"
} else {
message("Warning: No valid data source found (both folders empty or missing). Using default: merged_tif")
"merged_tif"
}
}
# Set global data_source for parameters_project.R
assign("data_source", data_source, envir = .GlobalEnv)
tryCatch({
source("r_app/parameters_project.R")
source("r_app/00_common_utils.R")
source("r_app/40_mosaic_creation_utils.R")
safe_log(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) {
message("Note: Could not open files from r_app directory")
message("Attempting to source from default directory instead...")
tryCatch({
source("parameters_project.R")
paths <- setup_project_directories(project_dir)
message("✓ Successfully sourced files from default directory")
}, error = function(e) {
stop("Failed to source required files from both 'r_app' and default directories.")
})
})
# Use centralized paths (no need to manually construct or create dirs)
merged_final <- paths$growth_model_interpolated_dir # or merged_final_tif if needed
daily_vrt <- paths$vrt_dir
safe_log(paste("Using growth model/mosaic directory:", merged_final))
safe_log(paste("Using daily VRT directory:", daily_vrt))
# 4. Generate date range for processing
# ---------------------------------
dates <- date_list(end_date, offset)
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
# Create output filename
# Only use custom filename if explicitly provided (not empty string)
file_name_tif <- if (length(args) >= 4 && !is.na(args[4]) && nchar(args[4]) > 0) {
as.character(args[4])
} else {
paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")
}
safe_log(paste("Output will be saved as:", file_name_tif))
# 5. Create weekly mosaics - route based on project tile detection
# ---------------------------------------------------------------
# The use_tile_mosaic flag is auto-detected by parameters_project.R
# based on whether tiles exist in merged_final_tif/
if (!exists("use_tile_mosaic")) {
# Fallback detection if flag not set (shouldn't happen)
merged_final_dir <- file.path(laravel_storage, "merged_final_tif")
tile_detection <- detect_tile_structure_from_merged_final(merged_final_dir)
use_tile_mosaic <- tile_detection$has_tiles
}
if (use_tile_mosaic) {
# TILE-BASED APPROACH: Create per-tile weekly MAX mosaics
# This is used for projects like Angata with large ROIs requiring spatial partitioning
# Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20)
tryCatch({
safe_log("Starting per-tile mosaic creation (tile-based approach)...")
# Detect grid size from merged_final_tif folder structure
# Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc.
merged_final_base <- file.path(laravel_storage, "merged_final_tif")
grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE)
# Look for grid size patterns like "5x5", "10x10", "20x20"
grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
if (length(grid_patterns) == 0) {
stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)")
}
grid_size <- grid_patterns[1] # Use first grid size found
safe_log(paste("Detected grid size:", grid_size))
# Point to the grid-specific merged_final_tif directory
merged_final_with_grid <- file.path(merged_final_base, grid_size)
# Set output directory for per-tile mosaics, organized by grid size (from centralized paths)
# Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif
tile_output_base <- file.path(paths$weekly_tile_max_dir, grid_size)
# Note: no dir.create needed - paths$weekly_tile_max_dir already created by setup_project_directories()
dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE) # Create grid-size subfolder
created_tile_files <- create_weekly_mosaic_from_tiles(
dates = dates,
merged_final_dir = merged_final_with_grid,
tile_output_dir = tile_output_base,
field_boundaries = field_boundaries
)
safe_log(paste("✓ Per-tile mosaic creation completed - created",
length(created_tile_files), "tile files"))
}, error = function(e) {
safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR")
traceback()
stop("Mosaic creation failed")
})
} else {
# SINGLE-FILE APPROACH: Create single weekly mosaic file
# This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output
tryCatch({
safe_log("Starting single-file mosaic creation (backward-compatible approach)...")
# Set output directory for single-file mosaics (from centralized paths)
single_file_output_dir <- paths$weekly_mosaic_dir
created_file <- create_weekly_mosaic(
dates = dates,
field_boundaries = field_boundaries,
daily_vrt_dir = daily_vrt,
merged_final_dir = merged_final,
output_dir = single_file_output_dir,
file_name_tif = file_name_tif,
create_plots = FALSE
)
safe_log(paste("✓ Single-file mosaic creation completed:", created_file))
}, error = function(e) {
safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR")
traceback()
stop("Mosaic creation failed")
})
}
}
if (sys.nframe() == 0) {
main()
}

View file

@ -165,6 +165,13 @@ main <- function() {
dates <- date_list(end_date, offset) dates <- date_list(end_date, offset)
# Validate week calculation
message(sprintf("[INFO] Requested offset: %d days", offset))
message(sprintf("[INFO] End date: %s", format(end_date, "%Y-%m-%d")))
message(sprintf("[INFO] Start date: %s", format(dates$start_date, "%Y-%m-%d")))
message(sprintf("[INFO] Calculating ISO week: %d", dates$week))
message(sprintf("[INFO] Calculating ISO year: %d", dates$year))
# ==== Create Per-Field Weekly Mosaics ==== # ==== Create Per-Field Weekly Mosaics ====
created_files <- create_all_field_weekly_mosaics( created_files <- create_all_field_weekly_mosaics(

View file

@ -42,6 +42,15 @@ date_list <- function(end_date, offset) {
week <- lubridate::isoweek(end_date) week <- lubridate::isoweek(end_date)
year <- lubridate::isoyear(end_date) year <- lubridate::isoyear(end_date)
# Validate: Check that all dates in range belong to same ISO week
start_week <- lubridate::isoweek(start_date)
start_year <- lubridate::isoyear(start_date)
if (start_week != week || start_year != year) {
safe_log(sprintf("WARNING: Date range spans multiple ISO weeks! Start: week %d/%d, End: week %d/%d. Using END date week %d/%d.",
start_week, start_year, week, year, week, year), "WARNING")
}
days_filter <- seq(from = start_date, to = end_date, by = "day") days_filter <- seq(from = start_date, to = end_date, by = "day")
days_filter <- format(days_filter, "%Y-%m-%d") days_filter <- format(days_filter, "%Y-%m-%d")
@ -117,7 +126,6 @@ find_per_field_tiffs_for_week <- function(field_tiles_ci_dir, days_filter) {
create_field_weekly_composite <- function(tiff_files, field_name) { create_field_weekly_composite <- function(tiff_files, field_name) {
if (length(tiff_files) == 0) { if (length(tiff_files) == 0) {
safe_log(paste("No TIFF files for field:", field_name), "WARNING")
return(NULL) return(NULL)
} }
@ -129,35 +137,30 @@ create_field_weekly_composite <- function(tiff_files, field_name) {
r <- terra::rast(file) r <- terra::rast(file)
rasters[[length(rasters) + 1]] <- r rasters[[length(rasters) + 1]] <- r
}, error = function(e) { }, error = function(e) {
safe_log(paste("Warning: Could not load", basename(file), "for field", field_name), "WARNING") # Silently skip load errors (they're already counted)
}) })
} }
if (length(rasters) == 0) { if (length(rasters) == 0) {
safe_log(paste("Failed to load any rasters for field:", field_name), "ERROR")
return(NULL) return(NULL)
} }
# Create MAX composite # Create MAX composite
if (length(rasters) == 1) { if (length(rasters) == 1) {
composite <- rasters[[1]] composite <- rasters[[1]]
safe_log(paste(" Field", field_name, "- single day (no compositing needed)"))
} else { } else {
# Stack all rasters and apply MAX per pixel per band # Stack all rasters and apply MAX per pixel per band
collection <- terra::sprc(rasters) collection <- terra::sprc(rasters)
composite <- terra::mosaic(collection, fun = "max") composite <- terra::mosaic(collection, fun = "max")
safe_log(paste(" Field", field_name, "- MAX composite from", length(rasters), "days"))
} }
# Ensure 5 bands with expected names # Ensure 5 bands with expected names
if (terra::nlyr(composite) >= 5) { if (terra::nlyr(composite) >= 5) {
composite <- terra::subset(composite, 1:5) composite <- terra::subset(composite, 1:5)
names(composite) <- c("Red", "Green", "Blue", "NIR", "CI") names(composite) <- c("Red", "Green", "Blue", "NIR", "CI")
} else {
safe_log(paste("Warning: Field", field_name, "has", terra::nlyr(composite),
"bands (expected 5)"), "WARNING")
} }
return(composite) return(composite)
}, error = function(e) { }, error = function(e) {
@ -190,11 +193,9 @@ save_field_weekly_mosaic <- function(raster, output_dir, field_name, week, year)
filename <- sprintf("week_%02d_%04d.tif", week, year) filename <- sprintf("week_%02d_%04d.tif", week, year)
file_path <- file.path(field_output_dir, filename) file_path <- file.path(field_output_dir, filename)
# Save raster # Save raster (silently)
terra::writeRaster(raster, file_path, overwrite = TRUE) terra::writeRaster(raster, file_path, overwrite = TRUE)
safe_log(paste(" Saved:", basename(field_output_dir), "/", filename))
return(file_path) return(file_path)
}, error = function(e) { }, error = function(e) {
@ -229,8 +230,13 @@ create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_di
created_files <- character() created_files <- character()
# Initialize progress bar
pb <- txtProgressBar(min = 0, max = length(field_tiffs), style = 3, width = 50)
counter <- 0
# Process each field # Process each field
for (field_name in names(field_tiffs)) { for (field_name in names(field_tiffs)) {
counter <- counter + 1
tiff_files <- field_tiffs[[field_name]] tiff_files <- field_tiffs[[field_name]]
# Create composite # Create composite
@ -250,8 +256,12 @@ create_all_field_weekly_mosaics <- function(dates, field_tiles_ci_dir, output_di
created_files <- c(created_files, saved_path) created_files <- c(created_files, saved_path)
} }
} }
setTxtProgressBar(pb, counter)
} }
close(pb)
cat("\n") # New line after progress bar
safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics")) safe_log(paste("✓ Completed: Created", length(created_files), "weekly field mosaics"))
return(created_files) return(created_files)

View file

@ -48,10 +48,6 @@
# - Critical dependency for Scripts 90/91 (reporting/dashboards) # - Critical dependency for Scripts 90/91 (reporting/dashboards)
# - Uses Moran's I for spatial clustering detection (weed/stress patterns) # - Uses Moran's I for spatial clustering detection (weed/stress patterns)
# #
# RELATED ISSUES:
# SC-112: Script 80 utilities restructuring (common + client-aware modules)
# SC-108: Core pipeline improvements
# SC-100: KPI definition and formula documentation
# #
# ============================================================================ # ============================================================================
# [✓] Extract planting dates per field # [✓] Extract planting dates per field
@ -320,7 +316,6 @@ main <- function() {
message("Output Formats:", paste(client_config$outputs, collapse = ", ")) message("Output Formats:", paste(client_config$outputs, collapse = ", "))
# Use centralized paths from setup object (no need for file.path calls) # Use centralized paths from setup object (no need for file.path calls)
weekly_tile_max <- setup$weekly_tile_max_dir
weekly_mosaic <- setup$weekly_mosaic_dir weekly_mosaic <- setup$weekly_mosaic_dir
daily_vals_dir <- setup$daily_ci_vals_dir daily_vals_dir <- setup$daily_ci_vals_dir
@ -394,96 +389,66 @@ main <- function() {
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
message(strrep("=", 70)) message(strrep("=", 70))
# Set reports_dir for CANE_SUPPLY workflow (used by export functions)
reports_dir <- setup$kpi_reports_dir
data_dir <- setup$data_dir
# Continue with existing per-field analysis code below # Continue with existing per-field analysis code below
message("\n", strrep("-", 70)) message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)") message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
message(strrep("-", 70)) message(strrep("-", 70))
current_week <- as.numeric(format(end_date, "%V")) # ISO week number (1-53) current_week <- as.numeric(format(end_date, "%V")) # ISO week number (1-53)
year <- as.numeric(format(end_date, "%G")) # Use ISO week year (%G) to match Script 40's mosaic naming year <- as.numeric(format(end_date, "%G")) # Use ISO week year (%G) to match Script 40's mosaic naming
# Calculate previous week using authoritative helper (handles year boundaries correctly) # Calculate previous week using authoritative helper (handles year boundaries correctly)
source("r_app/80_weekly_stats_utils.R") # Load helper function # Function already loaded from 80_utils_common.R sourced earlier
previous_info <- calculate_target_week_and_year(current_week, year, offset_weeks = 1) previous_info <- calculate_target_week_and_year(current_week, year, offset_weeks = 1)
previous_week <- previous_info$week previous_week <- previous_info$week
previous_year <- previous_info$year previous_year <- previous_info$year
message(paste("Week:", current_week, "/ Year (ISO):", year)) message(paste("Week:", current_week, "/ Year (ISO):", year))
# Find mosaic files - support both tile-based AND single-file approaches # Find per-field weekly mosaics
message("Finding mosaic files...") message("Finding per-field weekly mosaics...")
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, year)
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year) single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
# PRIORITY 1: Check for tile-based mosaics (projects with large ROI) if (!dir.exists(weekly_mosaic)) {
detected_grid_size <- NA stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
mosaic_dir <- NA "\nScript 40 (mosaic creation) must be run first."))
mosaic_mode <- NA }
if (dir.exists(weekly_tile_max)) { field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) field_dirs <- field_dirs[field_dirs != ""]
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) { if (length(field_dirs) == 0) {
detected_grid_size <- grid_patterns[1] stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
mosaic_dir <- file.path(weekly_tile_max, detected_grid_size) "\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) }
if (length(tile_files) > 0) { # Verify we have mosaics for this week
message(paste(" ✓ Using tile-based approach (grid-size:", detected_grid_size, ")")) single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
message(paste(" Found", length(tile_files), "tiles")) per_field_files <- c()
mosaic_mode <- "tiled" for (field in field_dirs) {
} field_mosaic_dir <- file.path(weekly_mosaic, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files <- c(per_field_files, files)
} }
} }
# PRIORITY 2: Check for per-field mosaics (NEW per-field architecture) if (length(per_field_files) == 0) {
if (is.na(mosaic_mode)) { stop(paste("ERROR: No mosaics found for week", current_week, "year", year,
message(" No tiles found. Checking for per-field mosaics...") "\nExpected pattern:", single_file_pattern,
# Check if weekly_mosaic has field subdirectories "\nChecked:", weekly_mosaic))
if (dir.exists(weekly_mosaic)) {
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
if (length(field_dirs) > 0) {
# Check if any field has the week pattern we're looking for
per_field_files <- c()
for (field in field_dirs) {
field_mosaic_dir <- file.path(weekly_mosaic, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files <- c(per_field_files, files)
}
}
if (length(per_field_files) > 0) {
message(paste(" ✓ Using per-field mosaic approach"))
message(paste(" Found", length(per_field_files), "per-field mosaics"))
mosaic_mode <- "per-field"
mosaic_dir <- weekly_mosaic # Will be field subdirectories
}
}
}
} }
# PRIORITY 3: Fall back to single-file mosaic (legacy approach) message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
if (is.na(mosaic_mode)) {
message(" No per-field mosaics found. Checking for single-file mosaic (legacy approach)...")
mosaic_dir <- weekly_mosaic
single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(single_file) > 0) {
message(paste(" ✓ Using single-file approach"))
message(paste(" Found 1 mosaic file:", basename(single_file[1])))
mosaic_mode <- "single-file"
} else {
stop(paste("ERROR: No mosaic files found for week", current_week, year,
"\n Checked (1) tile-based:", file.path(weekly_tile_max, "*", "week_*.tif"),
"\n Checked (2) per-field:", file.path(weekly_mosaic, "*", "week_*.tif"),
"\n Checked (3) single-file:", file.path(weekly_mosaic, "week_*.tif")))
}
}
message(paste(" Using mosaic mode:", mosaic_mode)) mosaic_mode <- "per-field"
mosaic_dir <- weekly_mosaic
# Load field boundaries # Load field boundaries
tryCatch({ tryCatch({
@ -551,44 +516,15 @@ main <- function() {
) )
} }
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile # Build per-field configuration
# ============================================================================
# NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
# ============================================================================
# Build tile grid (needed by calculate_field_statistics)
message("\nPreparing mosaic configuration for statistics calculation...") message("\nPreparing mosaic configuration for statistics calculation...")
message(" ✓ Using per-field mosaic architecture (1 TIFF per field)")
# For tile-based mosaics: build the grid mapping # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
# For single-file: create a minimal grid structure (single "tile" = entire mosaic) field_grid <- list(
if (mosaic_mode == "tiled") { mosaic_dir = mosaic_dir,
tile_grid <- build_tile_grid(mosaic_dir, current_week, year) mode = "per-field"
message(paste(" ✓ Built tile grid with", nrow(tile_grid), "tiles")) )
} else {
# Single-file mode: create a minimal grid with just the single mosaic
message(" ✓ Using single-file mosaic (no tile grid needed)")
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(single_file) == 0) {
stop("ERROR: Single-file mosaic not found in", mosaic_dir)
}
# Create a minimal tile_grid structure with one "tile" representing the entire mosaic
tile_grid <- list(
mosaic_dir = mosaic_dir,
data = data.frame(
id = 0, # Single tile ID = 0 (full extent)
xmin = NA_real_,
xmax = NA_real_,
ymin = NA_real_,
ymax = NA_real_,
stringsAsFactors = FALSE
),
mode = "single-file",
file = single_file[1]
)
}
message("\nUsing modular RDS-based approach for weekly statistics...") message("\nUsing modular RDS-based approach for weekly statistics...")
@ -599,7 +535,7 @@ main <- function() {
year = year, year = year,
project_dir = project_dir, project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf, field_boundaries_sf = field_boundaries_sf,
mosaic_dir = tile_grid$mosaic_dir, mosaic_dir = field_grid$mosaic_dir,
reports_dir = reports_dir, reports_dir = reports_dir,
report_date = end_date report_date = end_date
) )
@ -617,7 +553,7 @@ main <- function() {
year = previous_year, year = previous_year,
project_dir = project_dir, project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf, field_boundaries_sf = field_boundaries_sf,
mosaic_dir = tile_grid$mosaic_dir, mosaic_dir = field_grid$mosaic_dir,
reports_dir = reports_dir, reports_dir = reports_dir,
report_date = prev_report_date report_date = prev_report_date
) )

View file

@ -660,85 +660,101 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
message(paste("Calculating statistics for all fields - Week", week_num, year)) message(paste("Calculating statistics for all fields - Week", week_num, year))
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) # Per-field mode: look in per-field subdirectories
single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year) single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year)
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
if (length(tile_files) == 0) { # Find all field subdirectories with mosaics for this week
single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE) field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
if (length(single_file) > 0) { field_dirs <- field_dirs[field_dirs != ""]
message(paste(" Using single-file mosaic for week", week_num))
tile_files <- single_file[1] per_field_files <- list()
} else { for (field in field_dirs) {
stop(paste("No mosaic files found for week", week_num, year, "in", mosaic_dir)) field_mosaic_dir <- file.path(mosaic_dir, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files[[field]] <- files[1] # Take first match for this field
} }
} }
message(paste(" Found", length(tile_files), "mosaic file(s) for week", week_num)) if (length(per_field_files) == 0) {
stop(paste("No per-field mosaic files found for week", week_num, year, "in", mosaic_dir))
}
message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num))
results_list <- list() results_list <- list()
for (tile_idx in seq_along(tile_files)) { # Process each field's mosaic
tile_file <- tile_files[tile_idx] for (field_idx in seq_along(per_field_files)) {
field_name <- names(per_field_files)[field_idx]
field_file <- per_field_files[[field_name]]
tryCatch({ tryCatch({
current_rast <- terra::rast(tile_file) current_rast <- terra::rast(field_file)
ci_band <- current_rast[["CI"]] ci_band <- current_rast[["CI"]]
if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found")) message(paste(" [SKIP] Field", field_name, "- CI band not found"))
return(NULL) next
} }
extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE) # Extract CI values for this field
unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)]) field_boundary <- field_boundaries_sf[field_boundaries_sf$field == field_name, ]
for (field_poly_idx in unique_field_ids) { if (nrow(field_boundary) == 0) {
field_id <- field_boundaries_sf$field[field_poly_idx] message(paste(" [SKIP] Field", field_name, "- not in field boundaries"))
ci_vals <- extracted$CI[extracted$ID == field_poly_idx] next
ci_vals <- ci_vals[!is.na(ci_vals)]
if (length(ci_vals) == 0) next
mean_ci <- mean(ci_vals, na.rm = TRUE)
ci_std <- sd(ci_vals, na.rm = TRUE)
cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
range_min <- min(ci_vals, na.rm = TRUE)
range_max <- max(ci_vals, na.rm = TRUE)
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
ci_percentiles_str <- get_ci_percentiles(ci_vals)
GERMINATION_CI_THRESHOLD <- 2.0
num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE)
num_pixels_total <- length(ci_vals)
pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0
field_rows <- extracted[extracted$ID == field_poly_idx, ]
num_total <- nrow(field_rows)
num_data <- sum(!is.na(field_rows$CI))
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
cloud_cat <- if (num_data == 0) "No image available"
else if (pct_clear >= 95) "Clear view"
else "Partial coverage"
existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id)
if (length(existing_idx) > 0) next
results_list[[length(results_list) + 1]] <- data.frame(
Field_id = field_id,
Mean_CI = round(mean_ci, 2),
CV = round(cv * 100, 2),
CI_range = range_str,
CI_Percentiles = ci_percentiles_str,
Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
Cloud_pct_clear = pct_clear,
Cloud_category = cloud_cat,
stringsAsFactors = FALSE
)
} }
message(paste(" Tile", tile_idx, "of", length(tile_files), "processed")) extracted <- terra::extract(ci_band, field_boundary, na.rm = FALSE)
if (nrow(extracted) == 0 || all(is.na(extracted$CI))) {
message(paste(" [SKIP] Field", field_name, "- no CI values found"))
next
}
ci_vals <- extracted$CI[!is.na(extracted$CI)]
if (length(ci_vals) == 0) {
next
}
# Calculate statistics
mean_ci <- mean(ci_vals, na.rm = TRUE)
ci_std <- sd(ci_vals, na.rm = TRUE)
cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
range_min <- min(ci_vals, na.rm = TRUE)
range_max <- max(ci_vals, na.rm = TRUE)
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
ci_percentiles_str <- get_ci_percentiles(ci_vals)
num_pixels_total <- length(ci_vals)
num_pixels_gte_2 <- sum(ci_vals >= 2)
pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0
num_total <- nrow(extracted)
num_data <- sum(!is.na(extracted$CI))
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
cloud_cat <- if (num_data == 0) "No image available"
else if (pct_clear >= 95) "Clear view"
else "Partial coverage"
# Add to results
results_list[[length(results_list) + 1]] <- data.frame(
Field_id = field_name,
Mean_CI = round(mean_ci, 2),
CV = round(cv * 100, 2),
CI_range = range_str,
CI_Percentiles = ci_percentiles_str,
Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
Cloud_pct_clear = pct_clear,
Cloud_category = cloud_cat,
stringsAsFactors = FALSE
)
message(paste(" Field", field_idx, "of", length(per_field_files), "processed"))
}, error = function(e) { }, error = function(e) {
message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message)) message(paste(" [ERROR] Field", field_name, ":", e$message))
}) })
} }

View file

@ -3,7 +3,7 @@ params:
ref: "word-styles-reference-var1.docx" ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx output_file: CI_report.docx
report_date: "2025-09-30" report_date: "2025-09-30"
data_dir: "aura" data_dir: "angata"
mail_day: "Wednesday" mail_day: "Wednesday"
borders: FALSE borders: FALSE
ci_plot_type: "both" # options: "absolute", "cumulative", "both" ci_plot_type: "both" # options: "absolute", "cumulative", "both"
@ -107,8 +107,9 @@ project_dir <- params$data_dir
# Source project parameters with error handling # Source project parameters with error handling
tryCatch({ tryCatch({
source(here::here("r_app", "parameters_project.R")) source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "00_common_utils.R"))
}, error = function(e) { }, error = function(e) {
stop("Error loading parameters_project.R: ", e$message) stop("Error loading project utilities: ", e$message)
}) })
# Load centralized paths # Load centralized paths
@ -363,7 +364,7 @@ safe_log(paste("Week range:", week_start, "to", week_end))
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE} ```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
# Load CI quadrant data for field-level analysis # Load CI quadrant data for field-level analysis
tryCatch({ tryCatch({
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
safe_log("Successfully loaded CI quadrant data") safe_log("Successfully loaded CI quadrant data")
}, error = function(e) { }, error = function(e) {
stop("Error loading CI quadrant data: ", e$message) stop("Error loading CI quadrant data: ", e$message)
@ -840,7 +841,7 @@ The following table provides a comprehensive overview of all monitored fields wi
```{r detailed_field_table, echo=FALSE, results='asis'} ```{r detailed_field_table, echo=FALSE, results='asis'}
# Load CI quadrant data to get field ages # Load CI quadrant data to get field ages
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
# Identify the current season for each field based on report_date # Identify the current season for each field based on report_date
# The current season is the one where the report_date falls within or shortly after the season # The current season is the one where the report_date falls within or shortly after the season

View file

@ -3,7 +3,7 @@ params:
ref: "word-styles-reference-var1.docx" ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx output_file: CI_report.docx
report_date: "2025-09-30" report_date: "2025-09-30"
data_dir: "aura" data_dir: "angata"
mail_day: "Wednesday" mail_day: "Wednesday"
borders: FALSE borders: FALSE
ci_plot_type: "both" # options: "absolute", "cumulative", "both" ci_plot_type: "both" # options: "absolute", "cumulative", "both"
@ -110,6 +110,13 @@ tryCatch({
stop("Error loading parameters_project.R: ", e$message) stop("Error loading parameters_project.R: ", e$message)
}) })
# Source common utilities for logging and helper functions
tryCatch({
source(here::here("r_app", "00_common_utils.R"))
}, error = function(e) {
stop("Error loading 00_common_utils.R: ", e$message)
})
# Load centralized paths # Load centralized paths
paths <- setup_project_directories(project_dir) paths <- setup_project_directories(project_dir)
@ -480,7 +487,7 @@ safe_log(paste("Week range:", week_start, "to", week_end))
```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE} ```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE}
# Load CI index data with error handling # Load CI index data with error handling
tryCatch({ tryCatch({
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) CI_quadrant <- readRDS(here::here(paths$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
safe_log("Successfully loaded CI quadrant data") safe_log("Successfully loaded CI quadrant data")
}, error = function(e) { }, error = function(e) {

View file

@ -301,6 +301,9 @@ load_field_boundaries <- function(data_dir) {
tryCatch({ tryCatch({
boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE) boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE)
# Filter out features with empty geometries
boundaries_sf <- boundaries_sf[!st_is_empty(boundaries_sf), ]
# Repair geometries if needed # Repair geometries if needed
if (!all(sf::st_is_valid(boundaries_sf))) { if (!all(sf::st_is_valid(boundaries_sf))) {
boundaries_sf <- sf::st_make_valid(boundaries_sf) boundaries_sf <- sf::st_make_valid(boundaries_sf)

File diff suppressed because it is too large Load diff