SmartCane/r_app/run_full_pipeline.R

# ==============================================================================
# FULL PIPELINE RUNNER
# ==============================================================================
# Mixed Python/R pipeline:
# 1. Python: Download Planet images
# 2. R 10: Create master grid and split TIFFs
# 3. R 20: CI Extraction
# 4. R 21: Convert CI RDS to CSV
# 5. R 30: Interpolate growth model
# 6. Python 31: Harvest imminent weekly
# 7. R 40: Mosaic creation
# 8. R 80: Calculate KPIs
#
# ==============================================================================
# HOW TO RUN THIS SCRIPT
# ==============================================================================
#
# Run from the smartcane/ directory:
#
# Option 1 (Recommended - shows real-time output):
#   Rscript r_app/run_full_pipeline.R
#
# Option 2 (Full path to Rscript - use & in PowerShell for paths with spaces):
#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
#
# Option 3 (Batch mode - output saved to .Rout file):
#   R CMD BATCH --vanilla r_app/run_full_pipeline.R
#
# ==============================================================================
# ==============================================================================

# *** EDIT THESE VARIABLES ***
end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date()
project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba"
data_source <- "merged_tif" # Standard data source directory
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
# ***************************

# Define Rscript path for running external R scripts via system()
RSCRIPT_PATH <- file.path("C:", "Program Files", "R", "R-4.4.3", "bin", "x64", "Rscript.exe")

# Load client type mapping and centralized paths from parameters_project.R
source("r_app/parameters_project.R")
source("r_app/00_common_utils.R")
paths <- setup_project_directories(project_dir)
client_type <- get_client_type(project_dir)
cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type))

# ==============================================================================
# DETERMINE REPORTING WINDOW (auto-calculated based on KPI requirements)
# ==============================================================================
# Script 80 (KPIs) needs N weeks of historical data for trend analysis and reporting
# We calculate this automatically based on client type
reporting_weeks_needed <- 8 # CRITICAL: Need 8 weeks for 8-week trend analysis (Script 80 requirement)
offset <- reporting_weeks_needed * 7 # Convert weeks to days (8 weeks = 56 days)

cat(sprintf("\n[INFO] Reporting window: %d weeks (%d days of data)\n", reporting_weeks_needed, offset))
wwy_current <- get_iso_week_year(end_date)
cat(sprintf("       Running week: %02d / %d\n", wwy_current$week, wwy_current$year))
cat(sprintf("       Date range: %s to %s\n", format(end_date - offset, "%Y-%m-%d"), format(end_date, "%Y-%m-%d")))


# Format dates
end_date_str <- format(as.Date(end_date), "%Y-%m-%d")

# Track success of pipeline
pipeline_success <- TRUE

# ==============================================================================
# EARLY PREREQ CHECK: Verify mosaic requirements BEFORE any downloads
# ==============================================================================
# This determines if we need more weeks of data than the initial reporting window
# Run this BEFORE downloads so we can download ONLY missing dates upfront
cat("\n========== EARLY CHECK: MOSAIC REQUIREMENTS FOR REPORTING WINDOW ==========\n")

# Detect mosaic mode early (centralized function in parameters_project.R)
mosaic_mode <- detect_mosaic_mode(project_dir)

# Check what mosaics we NEED
weeks_needed <- data.frame()
for (weeks_back in 0:(reporting_weeks_needed - 1)) {
  check_date <- end_date - (weeks_back * 7)
  wwy <- get_iso_week_year(check_date)
  weeks_needed <- rbind(weeks_needed, data.frame(week = wwy$week, year = wwy$year, date = check_date))
}

missing_weeks_dates <- c() # Will store the earliest date of missing weeks
earliest_missing_date <- end_date # Start with end_date, go back if needed
missing_weeks <- data.frame() # Track ALL missing weeks for later processing by Script 40

for (i in 1:nrow(weeks_needed)) {
  week_num <- weeks_needed[i, "week"]
  year_num <- weeks_needed[i, "year"]
  check_date <- weeks_needed[i, "date"]

  # Pattern must be flexible to match both:
  #   - Single-file: week_51_2025.tif (top-level)
  #   - Single-file per-field: week_51_2025.tif (in {FIELD}/ subdirectories)
  #   - Tiled: week_51_2025_01.tif, week_51_2025_02.tif, etc.
  week_pattern_check <- sprintf("week_%02d_%d", week_num, year_num)
  files_this_week <- c()

  if (mosaic_mode == "tiled") {
    mosaic_dir_check <- get_mosaic_dir(project_dir, mosaic_mode = "tiled")
    if (dir.exists(mosaic_dir_check)) {
      # NEW: Support per-field architecture - search recursively for mosaics in field subdirectories
      files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check, recursive = TRUE, full.names = FALSE)
    }
  } else if (mosaic_mode == "single-file") {
    mosaic_dir_check <- paths$weekly_mosaic_dir
    if (dir.exists(mosaic_dir_check)) {
      # NEW: Support per-field architecture - search recursively for mosaics in field subdirectories
      # Check both top-level (legacy) and field subdirectories (per-field architecture)
      files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check, recursive = TRUE, full.names = FALSE)
    }
  }

  cat(sprintf(
    "  Week %02d/%d (%s): %s\n", week_num, year_num, format(check_date, "%Y-%m-%d"),
    if (length(files_this_week) > 0) "✓ EXISTS" else "✗ MISSING"
  ))

  # If week is missing, track its date range for downloading/processing
  if (length(files_this_week) == 0) {
    week_start <- check_date - 6 # Monday of that week
    if (week_start < earliest_missing_date) {
      earliest_missing_date <- week_start
    }
    # Add to missing_weeks dataframe - Script 40 will process these
    missing_weeks <- rbind(missing_weeks, data.frame(week = week_num, year = year_num, week_end_date = check_date))
  }
}

# Calculate dynamic offset for preprocessing: only process from earliest missing week to end_date
if (earliest_missing_date < end_date) {
  cat(sprintf("\n[INFO] Missing week(s) detected - need to fill from %s onwards\n", format(earliest_missing_date, "%Y-%m-%d")))

  # Adjust offset to cover only the gap (from earliest missing week to end_date)
  dynamic_offset <- as.numeric(end_date - earliest_missing_date)
  cat(sprintf(
    "[INFO] Will download/process ONLY missing dates: %d days (from %s to %s)\n",
    dynamic_offset, format(earliest_missing_date, "%Y-%m-%d"), format(end_date, "%Y-%m-%d")
  ))

  # Use dynamic offset for data generation scripts (10, 20, 30, 40)
  # But Script 80 still uses full reporting_weeks_needed offset for KPI calculations
  data_generation_offset <- dynamic_offset
  force_data_generation <- TRUE
} else {
  cat("\n[INFO] ✓ All required mosaics exist - using normal reporting window\n")
  data_generation_offset <- offset # Use default reporting window offset
  force_data_generation <- FALSE
}

# ==============================================================================
# CHECK KPI REQUIREMENTS FOR REPORTING WINDOW
# ==============================================================================
# Scripts 90 (Word report) and 91 (Excel report) require KPIs for full reporting window
# Script 80 ALWAYS runs and will CALCULATE missing KPIs, so this is just for visibility
# Uses centralized check_kpi_completeness() function from parameters_project.R
cat("\n========== KPI REQUIREMENT CHECK ==========\n")
cat(sprintf(
  "KPIs needed for reporting: %d weeks (current week + %d weeks history)\n",
  reporting_weeks_needed, reporting_weeks_needed - 1
))

# Check KPI completeness (replaces duplicate logic from lines ~228-270 and ~786-810)
kpi_check <- check_kpi_completeness(project_dir, client_type, end_date, reporting_weeks_needed)
kpi_dir <- kpi_check$kpi_dir
kpis_needed <- kpi_check$kpis_df
kpis_missing_count <- kpi_check$missing_count

# Create KPI directory if it doesn't exist
if (!dir.exists(kpi_dir)) {
  dir.create(kpi_dir, recursive = TRUE, showWarnings = FALSE)
}

# Display status for each week
if (nrow(kpis_needed) > 0) {
  for (i in 1:nrow(kpis_needed)) {
    row <- kpis_needed[i, ]
    cat(sprintf(
      "  Week %02d/%d (%s): %s (%d files)\n",
      row$week, row$year, format(row$date, "%Y-%m-%d"),
      if (row$has_kpis) "✓ EXISTS" else "✗ WILL BE CALCULATED",
      row$file_count
    ))
  }
} else {
  cat("  (No weeks in reporting window)\n")
}

cat(sprintf(
  "\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by Script 80\n",
  nrow(kpis_needed) - kpis_missing_count, nrow(kpis_needed), kpis_missing_count
))

# Define conditional script execution based on client type
# Client types:
#   - "cane_supply":          Runs Scripts 20,21,22,23,30,31,80,91 (full pipeline with Excel output)
#   - "agronomic_support":    Runs Scripts 20,30,80,90 only (KPI calculation + Word report)
#
# Scripts that ALWAYS run (regardless of client type):
#   - 00: Python Download
#   - 10: Tiling (if outputs don't exist)
#   - 20: CI Extraction
#   - 30: Growth Model
#   - 40: Mosaic Creation
#   - 80: KPI Calculation
#
# Scripts that are client-type specific:
#   - 21: CI RDS→CSV (cane_supply only)
#   - 22: (cane_supply only)
#   - 23: (cane_supply only)
#   - 31: Harvest Imminent (cane_supply only)
#   - 90: Legacy Word Report (agronomic_support only)
#   - 91: Modern Excel Report (cane_supply only)
skip_cane_supply_only <- (client_type != "cane_supply") # Skip Scripts 21,22,23,31 for non-cane_supply
run_legacy_report <- (client_type == "agronomic_support") # Script 90 for agronomic support
run_modern_report <- (client_type == "cane_supply") # Script 91 for cane supply

# ==============================================================================
# INTELLIGENT CHECKING: What has already been completed?
# ==============================================================================
cat("\n========== CHECKING EXISTING OUTPUTS ==========\n")

# Use centralized mosaic mode detection from parameters_project.R
cat(sprintf("Auto-detected mosaic mode: %s\n", mosaic_mode))

# Check Script 10 outputs - FLEXIBLE: look for tiles either directly OR in grid subdirs
tiles_split_base <- paths$daily_tiles_split_dir
tiles_dates <- c()
if (dir.exists(tiles_split_base)) {
  # Try grid-size subdirectories first (5x5, 10x10, etc.) - preferred new structure
  subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
  grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)

  if (length(grid_patterns) > 0) {
    # New structure: daily_tiles_split/{grid_size}/{dates}/
    grid_dir <- file.path(tiles_split_base, grid_patterns[1])
    tiles_dates <- list.dirs(grid_dir, full.names = FALSE, recursive = FALSE)
  } else {
    # Old structure: daily_tiles_split/{dates}/ (no grid-size subfolder)
    tiles_dates <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
  }
}
cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates)))

# Check Script 20 outputs (CI extraction) - daily RDS files
ci_daily_dir <- paths$daily_ci_vals_dir
ci_files <- if (dir.exists(ci_daily_dir)) {
  list.files(ci_daily_dir, pattern = "\\.rds$")
} else {
  c()
}
cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))

# Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this
# Instead, check if CI RDS files exist - if they do, 21 should also run
# For now, just note that CSV is time-dependent, not a good skip indicator
cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")

# Check Script 40 outputs (mosaics) - check which weeks are missing (not just current week)
# The early check section already identified missing_weeks, so we use that
skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Only skip if NO missing weeks AND not forcing rerun
cat(sprintf("Script 40: %d missing week(s) to create\n", nrow(missing_weeks)))

# Check Script 80 outputs (KPIs in reports/kpis/{field_level|field_analysis})
# kpi_dir already set by check_kpi_completeness() above
# Script 80 exports to .xlsx (Excel) and .rds (RDS) formats
kpi_files <- if (dir.exists(kpi_dir)) {
  list.files(kpi_dir, pattern = "\\.xlsx$|\\.rds$")
} else {
  c()
}
cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))

# Determine if scripts should run based on outputs AND client type
skip_10 <- (length(tiles_dates) > 0 && !force_rerun && !force_data_generation) # Force Script 10 if missing weeks detected
skip_20 <- FALSE # Script 20 ALWAYS runs for all client types - processes new downloaded data
skip_21 <- skip_cane_supply_only # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion)
skip_22 <- skip_cane_supply_only # Script 22 runs ONLY for cane_supply clients
skip_23 <- skip_cane_supply_only # Script 23 runs ONLY for cane_supply clients
skip_30 <- FALSE # Script 30 ALWAYS runs for all client types
skip_31 <- skip_cane_supply_only # Script 31 runs ONLY for cane_supply clients
skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Skip Script 40 only if NO missing weeks
skip_80 <- (kpis_missing_count == 0 && !force_rerun) # Skip Script 80 only if ALL KPIs exist AND not forcing rerun

cat("\nSkipping decisions (based on outputs AND client type):\n")
cat(sprintf("  Script 10: %s\n", if (skip_10) "SKIP" else "RUN"))
cat(sprintf("  Script 20: RUN (always runs to process new downloads)\n"))
cat(sprintf("  Script 21: %s %s\n", if (skip_21) "SKIP" else "RUN", if (skip_cane_supply_only && !skip_21) "(non-cane_supply client)" else ""))
cat(sprintf("  Script 22: %s %s\n", if (skip_22) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else ""))
cat(sprintf("  Script 23: %s %s\n", if (skip_23) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else ""))
cat(sprintf("  Script 30: %s (always runs)\n", if (skip_30) "SKIP" else "RUN"))
cat(sprintf("  Script 31: %s %s\n", if (skip_31) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else ""))
cat(sprintf("  Script 40: %s (looping through %d missing weeks)\n", if (skip_40) "SKIP" else "RUN", nrow(missing_weeks)))
cat(sprintf("  Script 80: %s (always runs)\n", if (skip_80) "SKIP" else "RUN"))
cat(sprintf("  Script 90: %s %s\n", if (!run_legacy_report) "SKIP" else "RUN", if (run_legacy_report) "(agronomic_support legacy report)" else ""))
cat(sprintf("  Script 91: %s %s\n", if (!run_modern_report) "SKIP" else "RUN", if (run_modern_report) "(cane_supply modern report)" else ""))

# ==============================================================================
# PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY)
# ==============================================================================
cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n")
tryCatch(
  {
    # Setup paths
    # NOTE: All downloads go to merged_tif/ regardless of project
    # (data_source variable is used later by Script 20 for reading, but downloads always go to merged_tif)
    merged_tifs_dir <- paths$merged_tif_folder  # Always check merged_tif for downloads

    cat(sprintf("[DEBUG] Checking for existing files in: %s\n", merged_tifs_dir))
    cat(sprintf("[DEBUG] Directory exists: %s\n", dir.exists(merged_tifs_dir)))

    # Get existing dates from raw TIFFs in merged_tif/
    existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
    existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)

    cat(sprintf("[DEBUG] Found %d existing TIFF files\n", length(existing_tiff_files)))
    if (length(existing_tiff_files) > 0) {
      cat(sprintf("[DEBUG] Sample files: %s\n", paste(head(existing_tiff_files, 3), collapse=", ")))
    }

    # Find missing dates in the window
    start_date <- end_date - data_generation_offset
    date_seq <- seq(start_date, end_date, by = "day")
    target_dates <- format(date_seq, "%Y-%m-%d")

    # Get existing dates from tiles (better indicator of completion for tiled projects)
    existing_tile_dates <- tiles_dates

    # CRITICAL FIX: Always use TIFF dates for checking existing files
    # This is the source of truth - if merged_tif/ has a file, don't re-download it
    # We don't download again if the file exists, regardless of whether tiles have been created yet
    if (length(existing_tiff_dates) > 0) {
      cat(sprintf("[DEBUG] Using TIFF dates for existence check (found %d existing files)\n", length(existing_tiff_dates)))
      # IMPORTANT: Only consider existing TIFF dates that fall within our target window
      # This prevents old 2025 data from masking missing 2026 data
      existing_tile_dates <- existing_tiff_dates[existing_tiff_dates %in% target_dates]
    }

    # Only download if files don't exist yet (tiles for tiled projects, TIFFs for single-file)
    missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]

    if (mosaic_mode == "single-file") {
      cat(sprintf("  Existing TIFF dates: %d\n", length(existing_tile_dates)))
    } else {
      cat(sprintf("  Existing tiled dates: %d\n", length(existing_tile_dates)))
    }
    cat(sprintf("  Missing dates in window: %d\n", length(missing_dates)))

    # Download each missing date
    download_count <- 0
    download_failed <- 0

    if (length(missing_dates) > 0) {
      # Save current directory
      original_dir <- getwd()

      # Change to python_app directory so relative paths work correctly
      setwd("python_app")

      for (date_str in missing_dates) {
        cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str)
        result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE)
        if (result == 0) {
          download_count <- download_count + 1
        } else {
          download_failed <- download_failed + 1
        }
      }

      # Change back to original directory
      setwd(original_dir)
    }

    cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed))
    if (download_failed > 0) {
      cat("⚠ Some downloads failed, but continuing pipeline\n")
    }

    # Force Script 10 to run ONLY if downloads actually succeeded (not just attempted)
    if (download_count > 0) {
      skip_10 <- FALSE
    }
  },
  error = function(e) {
    cat("✗ Error in planet download:", e$message, "\n")
    pipeline_success <<- FALSE
  }
)

# ==============================================================================
# SCRIPT 10: CREATE PER-FIELD TIFFs
# ==============================================================================
if (pipeline_success && !skip_10) {
  cat("\n========== RUNNING SCRIPT 10: CREATE PER-FIELD TIFFs ==========\n")
  tryCatch(
    {
      # Run Script 10 via system() - NEW per-field version
      # Arguments: project_dir
      cmd <- sprintf(
        '"%s" r_app/10_create_per_field_tiffs.R "%s"',
        RSCRIPT_PATH,
        project_dir
      )
      result <- system(cmd)

      if (result != 0) {
        stop("Script 10 exited with error code:", result)
      }

      # Verify output - check per-field structure
      field_tiles_dir <- paths$field_tiles_dir
      if (dir.exists(field_tiles_dir)) {
        fields <- list.dirs(field_tiles_dir, full.names = FALSE, recursive = FALSE)
        fields <- fields[fields != ""]
        total_files <- sum(sapply(file.path(field_tiles_dir, fields), function(f) length(list.files(f, pattern = "\\.tif$"))))
        cat(sprintf("✓ Script 10 completed - created per-field TIFFs (%d fields, %d files)\n", length(fields), total_files))
      } else {
        cat("✓ Script 10 completed\n")
      }
    },
    error = function(e) {
      cat("✗ Error in Script 10:", e$message, "\n")
      pipeline_success <<- FALSE
    }
  )
} else if (skip_10) {
  cat("\n========== SKIPPING SCRIPT 10 (per-field TIFFs already exist) ==========\n")
}

# ==============================================================================
# CHECK: Per-Field TIFFs Without CI Data
# ==============================================================================
# IMPORTANT: Script 10 creates per-field TIFFs for ALL dates in merged_tif/
# But Script 20 only processes dates within the offset window.
# This check finds dates that have per-field TIFFs but NO CI data,
# and forces Script 20 to process them regardless of offset.
cat("\n========== CHECKING FOR PER-FIELD TIFFs WITHOUT CI DATA ==========\n")

field_tiles_dir <- paths$field_tiles_dir
field_tiles_ci_dir <- paths$field_tiles_ci_dir
ci_daily_dir <- paths$daily_ci_vals_dir

# Get all dates that have per-field TIFFs
tiff_dates_all <- c()
if (dir.exists(field_tiles_dir)) {
  # Check all field subdirectories
  fields <- list.dirs(field_tiles_dir, full.names = FALSE, recursive = FALSE)
  fields <- fields[fields != ""]

  if (length(fields) > 0) {
    for (field in fields) {
      field_path <- file.path(field_tiles_dir, field)
      # Get dates from TIFF filenames: YYYY-MM-DD_*.tif or similar
      tiff_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}.*\\.tif$")
      dates_in_field <- unique(sub("_.*$", "", tiff_files))  # Extract YYYY-MM-DD
      tiff_dates_all <- unique(c(tiff_dates_all, dates_in_field))
    }
  }
}

# Get all dates that have CI data (either from field_tiles_CI or extracted_ci)
ci_dates_all <- c()
if (dir.exists(field_tiles_ci_dir)) {
  # Check all field subdirectories for CI TIFFs
  fields_ci <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE)
  fields_ci <- fields_ci[fields_ci != ""]

  if (length(fields_ci) > 0) {
    for (field in fields_ci) {
      field_path <- file.path(field_tiles_ci_dir, field)
      ci_tiff_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}.*\\.tif$")
      dates_in_field <- unique(sub("_.*$", "", ci_tiff_files))
      ci_dates_all <- unique(c(ci_dates_all, dates_in_field))
    }
  }
}

# Also check extracted_ci RDS files as source of truth
if (dir.exists(ci_daily_dir)) {
  fields_rds <- list.dirs(ci_daily_dir, full.names = FALSE, recursive = FALSE)
  fields_rds <- fields_rds[fields_rds != ""]

  if (length(fields_rds) > 0) {
    for (field in fields_rds) {
      field_path <- file.path(ci_daily_dir, field)
      rds_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$")
      dates_in_field <- sub("\\.rds$", "", rds_files)
      ci_dates_all <- unique(c(ci_dates_all, dates_in_field))
    }
  }
}

# Find dates with TIFFs but no CI data
dates_missing_ci <- setdiff(tiff_dates_all, ci_dates_all)

cat(sprintf("Total per-field TIFF dates: %d\n", length(tiff_dates_all)))
cat(sprintf("Total CI data dates: %d\n", length(ci_dates_all)))
cat(sprintf("Dates with TIFFs but NO CI: %d\n", length(dates_missing_ci)))

# If there are per-field TIFFs without CI, force Script 20 to run with extended date range
if (length(dates_missing_ci) > 0) {
  cat("\n⚠ Found per-field TIFFs without CI data - forcing Script 20 to process them\n")
  cat(sprintf("  Sample missing dates: %s\n", paste(head(dates_missing_ci, 3), collapse=", ")))

  # Calculate extended date range: from earliest missing date to end_date
  earliest_missing_tiff <- min(as.Date(dates_missing_ci))
  extended_offset <- as.numeric(end_date - earliest_missing_tiff)

  cat(sprintf("  Extended offset: %d days (from %s to %s)\n",
    extended_offset, format(earliest_missing_tiff, "%Y-%m-%d"), format(end_date, "%Y-%m-%d")))

  # Use extended offset for Script 20
  offset_for_ci <- extended_offset
  skip_20 <- FALSE  # Force Script 20 to run
} else {
  cat("✓ All per-field TIFFs have corresponding CI data\n")
  offset_for_ci <- offset  # Use normal offset
}

# ==============================================================================
# SCRIPT 20: CI EXTRACTION
# ==============================================================================
if (pipeline_success && !skip_20) {
  cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n")
  tryCatch(
    {
      # Run Script 20 via system() to pass command-line args just like from terminal
      # Arguments: project_dir end_date offset
      # Use offset_for_ci which may have been extended if per-field TIFFs exist without CI
      cmd <- sprintf(
        '"%s" r_app/20_ci_extraction_per_field.R "%s" "%s" %d',
        RSCRIPT_PATH,
        project_dir, format(end_date, "%Y-%m-%d"), offset_for_ci
      )
      result <- system(cmd)

      if (result != 0) {
        stop("Script 20 exited with error code:", result)
      }

      # Verify CI output was created
      ci_daily_dir <- paths$daily_ci_vals_dir
      if (dir.exists(ci_daily_dir)) {
        files <- list.files(ci_daily_dir, pattern = "\\.rds$")
        cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files)))
      } else {
        cat("✓ Script 20 completed\n")
      }
    },
    error = function(e) {
      cat("✗ Error in Script 20:", e$message, "\n")
      pipeline_success <<- FALSE
    }
  )
} else if (skip_20) {
  cat("\n========== SKIPPING SCRIPT 20 (CI already extracted) ==========\n")
}

# ==============================================================================
# SCRIPT 21: CONVERT CI RDS TO CSV
# ==============================================================================
if (pipeline_success && !skip_21) {
  cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n")
  tryCatch(
    {
      # Set environment variables for the script
      assign("end_date", end_date, envir = .GlobalEnv)
      assign("offset", offset, envir = .GlobalEnv)
      assign("project_dir", project_dir, envir = .GlobalEnv)

      source("r_app/21_convert_ci_rds_to_csv.R")
      main() # Call main() to execute the script with the environment variables

      # Verify CSV output was created
      ci_csv_path <- paths$ci_for_python_dir
      if (dir.exists(ci_csv_path)) {
        csv_files <- list.files(ci_csv_path, pattern = "\\.csv$")
        cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files)))
      } else {
        cat("✓ Script 21 completed\n")
      }
    },
    error = function(e) {
      cat("✗ Error in Script 21:", e$message, "\n")
      pipeline_success <<- FALSE
    }
  )
} else if (skip_21) {
  cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n")
}

# ==============================================================================
# SCRIPT 30: INTERPOLATE GROWTH MODEL
# ==============================================================================
if (pipeline_success && !skip_30) {
  cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
  tryCatch(
    {
      # Run Script 30 via system() to pass command-line args just like from terminal
      # Script 30 expects: project_dir only
      # Per-field version reads CI data from Script 20 per-field output location
      cmd <- sprintf(
        '"%s" r_app/30_interpolate_growth_model.R "%s"',
        RSCRIPT_PATH,
        project_dir
      )
      result <- system(cmd)

      if (result != 0) {
        stop("Script 30 exited with error code:", result)
      }

      # Verify interpolated output - Script 30 saves to cumulative_ci_vals_dir
      cumulative_ci_vals_dir <- paths$cumulative_ci_vals_dir
      if (dir.exists(cumulative_ci_vals_dir)) {
        files <- list.files(cumulative_ci_vals_dir, pattern = "\\.rds$")
        cat(sprintf("✓ Script 30 completed - generated %d interpolated RDS file(s)\n", length(files)))
      } else {
        cat("✓ Script 30 completed\n")
      }
    },
    error = function(e) {
      cat("✗ Error in Script 30:", e$message, "\n")
      pipeline_success <<- FALSE
    }
  )
}

# ==============================================================================
# PYTHON 31: HARVEST IMMINENT WEEKLY
# ==============================================================================
if (pipeline_success && !skip_31) {
  cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n")
  tryCatch(
    {
      # Run Python script in pytorch_gpu conda environment
      # Script expects positional project name (not --project flag)
      # Run from smartcane root so conda can find the environment
      cmd <- sprintf("conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s", project_dir)
      result <- system(cmd)

      if (result == 0) {
        # Verify harvest output - check for THIS WEEK's specific file
        wwy_current_31 <- get_iso_week_year(end_date)
        harvest_exists <- check_harvest_output_exists(project_dir, wwy_current_31$week, wwy_current_31$year)

        if (harvest_exists) {
          cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", wwy_current_31$week))
        } else {
          cat("✓ Script 31 completed (check if harvest.xlsx is available)\n")
        }
      } else {
        cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n")
      }
    },
    error = function(e) {
      setwd(original_dir)
      cat("⚠ Script 31 error:", e$message, "\n")
    }
  )
} else if (skip_31) {
  cat("\n========== SKIPPING SCRIPT 31 (non-cane_supply client type) ==========\n")
}

# ==============================================================================
# SCRIPT 40: MOSAIC CREATION (LOOP THROUGH MISSING WEEKS)
# ==============================================================================
if (pipeline_success && !skip_40) {
  cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n")

  # If there are missing weeks, process them one at a time
  if (nrow(missing_weeks) > 0) {
    cat(sprintf("Found %d missing week(s) - running Script 40 once per week\n\n", nrow(missing_weeks)))

    # Loop through missing weeks in reverse chronological order (oldest first)
    for (week_idx in nrow(missing_weeks):1) {
      missing_week <- missing_weeks[week_idx, ]
      week_num <- missing_week$week
      year_num <- missing_week$year
      week_end_date <- as.Date(missing_week$week_end_date)

      cat(sprintf(
        "--- Creating mosaic for week %02d/%d (ending %s) ---\n",
        week_num, year_num, format(week_end_date, "%Y-%m-%d")
      ))

      tryCatch(
        {
          # Run Script 40 with offset=7 (one week only) for this specific week
          # The end_date is the last day of the week, and offset=7 covers the full 7-day week
          # Arguments: end_date offset project_dir
          cmd <- sprintf(
            '"%s" r_app/40_mosaic_creation_per_field.R "%s" 7 "%s"',
            RSCRIPT_PATH,
            format(week_end_date, "%Y-%m-%d"), project_dir
          )
          result <- system(cmd)

          if (result != 0) {
            stop("Script 40 exited with error code:", result)
          }

          # Verify mosaic was created for this specific week (centralized helper function)
          mosaic_check <- check_mosaic_exists(project_dir, week_num, year_num, mosaic_mode)
          mosaic_created <- mosaic_check$created

          if (mosaic_created) {
            cat(sprintf("✓ Week %02d/%d mosaic created successfully\n\n", week_num, year_num))
          } else {
            cat(sprintf("✓ Week %02d/%d processing completed (verify output)\n\n", week_num, year_num))
          }
        },
        error = function(e) {
          cat(sprintf("✗ Error creating mosaic for week %02d/%d: %s\n", week_num, year_num, e$message), "\n")
          pipeline_success <<- FALSE
        }
      )
    }

    if (pipeline_success) {
      cat(sprintf("✓ Script 40 completed - created all %d missing week mosaics\n", nrow(missing_weeks)))
    }
  } else {
    cat("No missing weeks detected - skipping Script 40\n")
    skip_40 <- TRUE
  }
} else if (skip_40) {
  cat("\n========== SKIPPING SCRIPT 40 (mosaics already created) ==========\n")
}

# ==============================================================================
# SCRIPT 80: CALCULATE KPIs (LOOP THROUGH REPORTING WINDOW)
# ==============================================================================
if (pipeline_success && !skip_80) {
  cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs FOR REPORTING WINDOW ==========\n")

  # Build list of weeks that NEED calculation (missing KPIs)
  weeks_to_calculate <- kpis_needed[!kpis_needed$has_kpis, ] # Only weeks WITHOUT KPIs

  if (nrow(weeks_to_calculate) > 0) {
    # Sort by date (oldest to newest) for sequential processing
    weeks_to_calculate <- weeks_to_calculate[order(weeks_to_calculate$date), ]

    cat(sprintf(
      "Looping through %d missing week(s) in reporting window (from %s back to %s):\n\n",
      nrow(weeks_to_calculate),
      format(max(weeks_to_calculate$date), "%Y-%m-%d"),
      format(min(weeks_to_calculate$date), "%Y-%m-%d")
    ))

    tryCatch(
      {
        for (week_idx in 1:nrow(weeks_to_calculate)) {
          week_row <- weeks_to_calculate[week_idx, ]
          calc_date <- week_row$date

          # Run Script 80 for this specific week with offset=7 (one week only)
          # This ensures Script 80 calculates KPIs for THIS week with proper trend data
          cmd <- sprintf(
            '"%s" r_app/80_calculate_kpis.R "%s" "%s" %d',
            RSCRIPT_PATH,
            format(calc_date, "%Y-%m-%d"), project_dir, 7
          ) # offset=7 for single week

          cat(sprintf(
            "  [Week %02d/%d] Running Script 80 with end_date=%s...\n",
            week_row$week, week_row$year, format(calc_date, "%Y-%m-%d")
          ))

          result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE)

          if (result == 0) {
            cat(sprintf("    ✓ KPIs calculated for week %02d/%d\n", week_row$week, week_row$year))
          } else {
            cat(sprintf(
              "    ✗ Error calculating KPIs for week %02d/%d (exit code: %d)\n",
              week_row$week, week_row$year, result
            ))
          }
        }

        # Verify total KPI output (kpi_dir defined by check_kpi_completeness() earlier)
        if (dir.exists(kpi_dir)) {
          files <- list.files(kpi_dir, pattern = "\\.xlsx$|\\.rds$")
          # Extract subdir name from kpi_dir path for display
          subdir_name <- basename(kpi_dir)
          cat(sprintf("\n✓ Script 80 loop completed - total %d KPI files in %s/\n", length(files), subdir_name))
        } else {
          cat("\n✓ Script 80 loop completed\n")
        }
      },
      error = function(e) {
        cat("✗ Error in Script 80 loop:", e$message, "\n")
        pipeline_success <<- FALSE
      }
    )
  } else {
    cat(sprintf("✓ All %d weeks already have KPIs - skipping calculation\n", nrow(kpis_needed)))
  }
} else if (skip_80) {
  cat("\n========== SKIPPING SCRIPT 80 (all KPIs already exist) ==========\n")
}

# ==============================================================================
# VERIFY KPI COMPLETION AFTER SCRIPT 80
# ==============================================================================
# Recheck if all KPIs are now available (Script 80 should have calculated any missing ones)
cat("\n========== VERIFYING KPI COMPLETION ==========\n")

kpis_complete <- TRUE
if (dir.exists(kpi_dir)) {
  for (weeks_back in 0:(reporting_weeks_needed - 1)) {
    check_date <- end_date - (weeks_back * 7)
    week_num <- as.numeric(format(check_date, "%V"))
    year_num <- as.numeric(format(check_date, "%G"))

    # Check for any KPI file from that week (flexible pattern to match all formats)
    # Matches: week_05_2026, AURA_KPI_week_05_2026, etc.
    week_pattern <- sprintf("_week_%02d_%d|week_%02d_%d", week_num, year_num, week_num, year_num)
    # NEW: Support per-field architecture - search recursively for KPI files in field subdirectories
    kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE)

    if (length(kpi_files_this_week) > 0) {
      cat(sprintf("  Week %02d/%d: ✓ KPIs found (%d files)\n", week_num, year_num, length(kpi_files_this_week)))
    } else {
      kpis_complete <- FALSE
      cat(sprintf("  Week %02d/%d: ✗ KPIs not found\n", week_num, year_num))
    }
  }
}

if (kpis_complete) {
  cat("✓ All KPIs available - full reporting window complete\n")
} else {
  cat("⚠ Note: Some KPIs may still be missing - Script 80 calculated what was available\n")
}

# ==============================================================================
# SCRIPT 90: LEGACY WORD REPORT (agronomic_support clients)
# ==============================================================================
if (pipeline_success && run_legacy_report) {
  cat("\n========== RUNNING SCRIPT 90: LEGACY WORD REPORT ==========\n")

  tryCatch(
    {
      # Script 90 is an RMarkdown file - compile it with rmarkdown::render()
      output_dir <- paths$reports_dir

      # Reports directory already created by setup_project_directories

      output_filename <- sprintf(
        "CI_report_week%02d_%d.docx",
        as.numeric(format(end_date, "%V")),
        as.numeric(format(end_date, "%G"))
      )

      # Render the RMarkdown document
        rmarkdown::render(
          input = "r_app/90_CI_report_with_kpis_simple.Rmd",
          output_dir = output_dir,
          output_file = output_filename,
          params = list(
            report_date = format(end_date, "%Y-%m-%d"),
            data_dir = project_dir
          ),
          quiet = TRUE
        )

        cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", output_filename))
      },
      error = function(e) {
        cat("✗ Error in Script 90:", e$message, "\n")
        pipeline_success <<- FALSE
      }
    )
} else if (run_legacy_report) {
  cat("\n========== SKIPPING SCRIPT 90 (pipeline error) ==========\n")
}

# ==============================================================================
# SCRIPT 91: MODERN WORD REPORT (cane_supply clients)
# ==============================================================================
if (pipeline_success && run_modern_report) {
  cat("\n========== RUNNING SCRIPT 91: MODERN WORD REPORT ==========\n")

  tryCatch(
      {
        # Script 91 is an RMarkdown file - compile it with rmarkdown::render()
        output_dir <- paths$reports_dir

        # Reports directory already created by setup_project_directories

        output_filename <- sprintf(
          "CI_report_week%02d_%d.docx",
          as.numeric(format(end_date, "%V")),
          as.numeric(format(end_date, "%G"))
        )

        # Render the RMarkdown document
        rmarkdown::render(
          input = "r_app/91_CI_report_with_kpis_Angata.Rmd",
          output_dir = output_dir,
          output_file = output_filename,
          params = list(
            report_date = format(end_date, "%Y-%m-%d"),
            data_dir = project_dir
          ),
          quiet = TRUE
        )

        cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", output_filename))
      },
      error = function(e) {
        cat("✗ Error in Script 91:", e$message, "\n")
        pipeline_success <<- FALSE
      }
    )
} else if (run_modern_report) {
  cat("\n========== SKIPPING SCRIPT 91 (pipeline error) ==========\n")
}

# ==============================================================================
# SUMMARY
# ==============================================================================
cat("\n========== PIPELINE COMPLETE ==========\n")
cat(sprintf("Project: %s\n", project_dir))
cat(sprintf("End Date: %s\n", end_date_str))
cat(sprintf("Offset: %d days\n", offset))
if (pipeline_success) {
  cat("Status: ✓ All scripts completed successfully\n")
} else {
  cat("Status: ✗ Pipeline failed - check errors above\n")
}
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80 → R 90/91\n")