# ============================================================================== # FULL PIPELINE RUNNER # ============================================================================== # Mixed Python/R pipeline: # 1. Python: Download Planet images # 2. R 10: Create master grid and split TIFFs # 3. R 20: CI Extraction # 4. R 21: Convert CI RDS to CSV # 5. R 30: Interpolate growth model # 6. Python 31: Harvest imminent weekly # 7. R 40: Mosaic creation # 8. R 80: Calculate KPIs # # ============================================================================== # HOW TO RUN THIS SCRIPT # ============================================================================== # # Run from the smartcane/ directory: # # Option 1 (Recommended - shows real-time output): # Rscript r_app/run_full_pipeline.R # # Option 2 (Full path to Rscript - use & in PowerShell for paths with spaces): # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R # # Option 3 (Batch mode - output saved to .Rout file): # R CMD BATCH --vanilla r_app/run_full_pipeline.R # # ============================================================================== # ============================================================================== # *** EDIT THESE VARIABLES *** end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date() offset <- 7 # days to look back project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba" data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist # *************************** # Format dates end_date_str <- format(as.Date(end_date), "%Y-%m-%d") # Track success of pipeline pipeline_success <- TRUE # ============================================================================== # INTELLIGENT CHECKING: What has already been completed? # ============================================================================== cat("\n========== CHECKING EXISTING OUTPUTS ==========\n") # Check Script 10 outputs (tiled splits) tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") tiles_dates <- if (dir.exists(tiles_dir)) { list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) } else { c() } cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates))) # Check Script 20 outputs (CI extraction) - daily RDS files ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") ci_files <- if (dir.exists(ci_daily_dir)) { list.files(ci_daily_dir, pattern = "\\.rds$") } else { c() } cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files))) # Check Script 21 outputs (CSV conversion) - note: this gets overwritten each time, so we don't skip based on this # Instead, check if CI RDS files exist - if they do, 21 should also run # For now, just note that CSV is time-dependent, not a good skip indicator cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n") # Check Script 40 outputs (mosaics in weekly_tile_max/5x5) mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") mosaic_files <- if (dir.exists(mosaic_dir)) { list.files(mosaic_dir, pattern = "\\.tif$") } else { c() } cat(sprintf("Script 40: %d mosaic files exist\n", length(mosaic_files))) # Check Script 80 outputs (KPIs in reports/kpis/field_stats) kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") kpi_files <- if (dir.exists(kpi_dir)) { list.files(kpi_dir, pattern = "\\.csv$|\\.json$") } else { c() } cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files))) # Determine if scripts should run based on outputs skip_10 <- length(tiles_dates) > 0 && !force_rerun skip_20 <- length(ci_files) > 0 && !force_rerun skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped skip_40 <- length(mosaic_files) > 0 && !force_rerun skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks cat("\nSkipping decisions:\n") cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP (tiles exist)" else "RUN")) cat(sprintf(" Script 20: %s\n", if(skip_20) "SKIP (CI exists)" else "RUN")) cat(sprintf(" Script 21: %s\n", if(skip_21) "SKIP (CI exists)" else "RUN")) cat(sprintf(" Script 40: %s\n", if(skip_40) "SKIP (mosaics exist)" else "RUN")) cat(sprintf(" Script 80: %s\n", if(skip_80) "SKIP (KPIs exist)" else "RUN")) # ============================================================================== # PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY) # ============================================================================== cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n") tryCatch({ # Setup paths base_path <- file.path("laravel_app", "storage", "app", project_dir) merged_tifs_dir <- file.path(base_path, data_source) # Get existing dates from raw TIFFs existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) # Get existing dates from tiles (better indicator of completion) existing_tile_dates <- tiles_dates # Find missing dates in the window start_date <- end_date - offset date_seq <- seq(start_date, end_date, by = "day") target_dates <- format(date_seq, "%Y-%m-%d") # Only download if tiles don't exist yet (more reliable than checking raw TIFFs) missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)] cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates))) cat(sprintf(" Missing dates in window: %d\n", length(missing_dates))) # Download each missing date download_count <- 0 download_failed <- 0 if (length(missing_dates) > 0) { # Save current directory original_dir <- getwd() # Change to python_app directory so relative paths work correctly setwd("python_app") for (date_str in missing_dates) { cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str) result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE) if (result == 0) { download_count <- download_count + 1 } else { download_failed <- download_failed + 1 } } # Change back to original directory setwd(original_dir) } cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed)) if (download_failed > 0) { cat("⚠ Some downloads failed, but continuing pipeline\n") } # Force Script 10 to run ONLY if downloads actually succeeded (not just attempted) if (download_count > 0) { skip_10 <- FALSE } }, error = function(e) { cat("✗ Error in planet download:", e$message, "\n") pipeline_success <<- FALSE }) # ============================================================================== # SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs # ============================================================================== if (pipeline_success && !skip_10) { cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n") tryCatch({ # Set environment variables for the script (Script 10 uses these for filtering) assign("PROJECT", project_dir, envir = .GlobalEnv) # Suppress verbose per-date output, show only summary sink(nullfile()) source("r_app/10_create_master_grid_and_split_tiffs.R") sink() # Verify output tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") if (dir.exists(tiles_dir)) { subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs))) } else { cat("✓ Script 10 completed\n") } }, error = function(e) { sink() cat("✗ Error in Script 10:", e$message, "\n") pipeline_success <<- FALSE }) } else if (skip_10) { cat("\n========== SKIPPING SCRIPT 10 (tiles already exist) ==========\n") } # ============================================================================== # SCRIPT 20: CI EXTRACTION # ============================================================================== if (pipeline_success && !skip_20) { cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n") tryCatch({ # Set environment variables for the script assign("end_date", end_date, envir = .GlobalEnv) assign("offset", offset, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv) assign("data_source", data_source, envir = .GlobalEnv) source("r_app/20_ci_extraction.R") main() # Call main() to execute the script with the environment variables # Verify CI output was created ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") if (dir.exists(ci_daily_dir)) { files <- list.files(ci_daily_dir, pattern = "\\.rds$") cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files))) } else { cat("✓ Script 20 completed\n") } }, error = function(e) { cat("✗ Error in Script 20:", e$message, "\n") pipeline_success <<- FALSE }) } else if (skip_20) { cat("\n========== SKIPPING SCRIPT 20 (CI already extracted) ==========\n") } # ============================================================================== # SCRIPT 21: CONVERT CI RDS TO CSV # ============================================================================== if (pipeline_success && !skip_21) { cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n") tryCatch({ # Set environment variables for the script assign("end_date", end_date, envir = .GlobalEnv) assign("offset", offset, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv) source("r_app/21_convert_ci_rds_to_csv.R") main() # Call main() to execute the script with the environment variables # Verify CSV output was created ci_csv_path <- file.path("laravel_app", "storage", "app", project_dir, "ci_extracted") if (dir.exists(ci_csv_path)) { csv_files <- list.files(ci_csv_path, pattern = "\\.csv$") cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files))) } else { cat("✓ Script 21 completed\n") } }, error = function(e) { cat("✗ Error in Script 21:", e$message, "\n") pipeline_success <<- FALSE }) } else if (skip_21) { cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n") } # ============================================================================== # SCRIPT 30: INTERPOLATE GROWTH MODEL # ============================================================================== if (pipeline_success) { cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") tryCatch({ # Set environment variables for the script assign("end_date", end_date, envir = .GlobalEnv) assign("offset", offset, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv) assign("data_source", data_source, envir = .GlobalEnv) source("r_app/30_interpolate_growth_model.R") main() # Call main() to execute the script with the environment variables # Verify interpolated output growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated") if (dir.exists(growth_dir)) { files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$") cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files))) } else { cat("✓ Script 30 completed\n") } }, error = function(e) { cat("✗ Error in Script 30:", e$message, "\n") pipeline_success <<- FALSE }) } # ============================================================================== # PYTHON 31: HARVEST IMMINENT WEEKLY # ============================================================================== if (pipeline_success) { cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n") tryCatch({ # Run Python script in pytorch_gpu conda environment # Script expects positional project name (not --project flag) # Run from smartcane root so conda can find the environment cmd <- sprintf('conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s', project_dir) cat("DEBUG: Running command:", cmd, "\n") result <- system(cmd) if (result == 0) { # Verify harvest output - check for THIS WEEK's specific file current_week <- as.numeric(format(end_date, "%V")) current_year <- as.numeric(format(end_date, "%Y")) expected_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats", sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) if (file.exists(expected_file)) { cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", current_week)) } else { cat("✓ Script 31 completed (check if harvest.xlsx is available)\n") } } else { cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n") } }, error = function(e) { setwd(original_dir) cat("⚠ Script 31 error:", e$message, "\n") }) } # ============================================================================== # SCRIPT 40: MOSAIC CREATION # ============================================================================== if (pipeline_success && !skip_40) { cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n") tryCatch({ # Set environment variables for the script assign("end_date", end_date, envir = .GlobalEnv) assign("offset", offset, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv) assign("data_source", data_source, envir = .GlobalEnv) source("r_app/40_mosaic_creation.R") main() # Call main() to execute the script with the environment variables # Verify mosaic output mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") if (dir.exists(mosaic_dir)) { files <- list.files(mosaic_dir, pattern = "\\.tif$") cat(sprintf("✓ Script 40 completed - generated %d mosaic files\n", length(files))) } else { cat("✓ Script 40 completed\n") } }, error = function(e) { cat("✗ Error in Script 40:", e$message, "\n") pipeline_success <<- FALSE }) } else if (skip_40) { cat("\n========== SKIPPING SCRIPT 40 (mosaics already created) ==========\n") } # ============================================================================== # SCRIPT 80: CALCULATE KPIs # ============================================================================== if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the current week cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n") tryCatch({ # Set environment variables for the script (Script 80's main() uses these as fallbacks) # NOTE: end_date is already a Date, just assign directly without as.Date() assign("end_date", end_date, envir = .GlobalEnv) assign("end_date_str", end_date_str, envir = .GlobalEnv) assign("offset", offset, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv) assign("data_source", data_source, envir = .GlobalEnv) source("r_app/80_calculate_kpis.R") main() # Call main() to execute the script with the environment variables # Verify KPI output kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") if (dir.exists(kpi_dir)) { files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") cat(sprintf("✓ Script 80 completed - generated %d KPI files\n", length(files))) } else { cat("✓ Script 80 completed\n") } }, error = function(e) { cat("✗ Error in Script 80:", e$message, "\n") cat("Full error:\n") print(e) pipeline_success <<- FALSE }) } # ============================================================================== # SUMMARY # ============================================================================== cat("\n========== PIPELINE COMPLETE ==========\n") cat(sprintf("Project: %s\n", project_dir)) cat(sprintf("End Date: %s\n", end_date_str)) cat(sprintf("Offset: %d days\n", offset)) if (pipeline_success) { cat("Status: ✓ All scripts completed successfully\n") } else { cat("Status: ✗ Pipeline failed - check errors above\n") } cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n")