adjusting scripts to work for all projects

2026-01-29 09:04:46 +01:00 · 2026-01-29 09:04:46 +01:00 · dd915f9b9e
parent b2de819fc4
commit dd915f9b9e
7 changed files with 385 additions and 115 deletions
--- a/python_app/00_download_8band_pu_optimized.py
+++ b/python_app/00_download_8band_pu_optimized.py
@ -89,20 +89,118 @@ def setup_config():
    config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44')
    config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos')
    
-    # BYOC collection for Planet 8-band data
-    collection_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a'
-    byoc = DataCollection.define_byoc(collection_id, name='planet_data_8b', is_timeless=True)
-    
    catalog = SentinelHubCatalog(config=config)
    
-    return config, byoc, catalog
+    return config, catalog
+
+
+def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_days: int = 7) -> Tuple:
+    """
+    Auto-detect which Planet collection is available for this project.
+    
+    Checks a week of dates (backwards from date_str) to ensure robust detection.
+    If ANY date has data in the new 8-band collection, use that.
+    If no dates have data in new collection, fall back to legacy 4-band.
+    
+    Args:
+        date_str: Reference date (YYYY-MM-DD)
+        bbox_list: List of bounding boxes for testing
+        catalog: SentinelHubCatalog instance
+        date_range_days: Number of days to check backwards (default: 7)
+    
+    Returns:
+        (byoc, collection_info_dict) where byoc is DataCollection and dict contains metadata
+    """
+    
+    new_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a'  # 8-band (new)
+    old_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c'  # 4-band (legacy)
+    test_bbox = bbox_list[0]
+    
+    # Generate date range (backwards from date_str)
+    try:
+        ref_date = datetime.datetime.strptime(date_str, '%Y-%m-%d')
+    except ValueError:
+        print(f"⚠️ Invalid date format: {date_str}. Using today.")
+        ref_date = datetime.datetime.now()
+    
+    date_range = [
+        (ref_date - datetime.timedelta(days=i)).strftime('%Y-%m-%d')
+        for i in range(date_range_days)
+    ]
+    
+    print(f"\nAuto-detecting Planet collection (checking {date_range_days} days)...")
+    print(f"  Test range: {date_range[-1]} to {date_range[0]}")
+    
+    # Try new collection first
+    print(f"\n  Trying 8-band collection: {new_id}")
+    byoc_new = DataCollection.define_byoc(new_id, name='planet_data_8b', is_timeless=True)
+    
+    for test_date in date_range:
+        try:
+            search = catalog.search(
+                collection=byoc_new,
+                bbox=test_bbox,
+                time=(test_date, test_date),
+                filter=None
+            )
+            tiles = list(search)
+            if len(tiles) > 0:
+                print(f"    ✓ Found data on {test_date} ({len(tiles)} tiles)")
+                print(f"  ✓ Using 8-band collection")
+                return byoc_new, {
+                    'collection_id': new_id,
+                    'name': 'planet_data_8b',
+                    'bands': 4,
+                    'output_folder': 'merged_tif_8b',
+                    'singles_folder': 'single_images_8b'
+                }
+        except Exception as e:
+            print(f"    ⚠️ {test_date}: {str(e)[:60]}")
+    
+    # No data in new collection, try legacy
+    print(f"\n  ✗ No data found in 8-band collection")
+    print(f"  Trying legacy 4-band collection: {old_id}")
+    byoc_old = DataCollection.define_byoc(old_id, name='planet_data', is_timeless=True)
+    
+    for test_date in date_range:
+        try:
+            search = catalog.search(
+                collection=byoc_old,
+                bbox=test_bbox,
+                time=(test_date, test_date),
+                filter=None
+            )
+            tiles = list(search)
+            if len(tiles) > 0:
+                print(f"    ✓ Found data on {test_date} ({len(tiles)} tiles)")
+                print(f"  ✓ Using legacy 4-band collection")
+                return byoc_old, {
+                    'collection_id': old_id,
+                    'name': 'planet_data',
+                    'bands': 4,
+                    'output_folder': 'merged_tif',
+                    'singles_folder': 'single_images'
+                }
+        except Exception as e:
+            print(f"    ⚠️ {test_date}: {str(e)[:60]}")
+    
+    # Neither collection has data
+    print(f"\n  ⚠️ No data found in either collection for {date_range_days} days")
+    print(f"  Defaulting to 8-band collection (will attempt download anyway)")
+    return byoc_new, {
+        'collection_id': new_id,
+        'name': 'planet_data_8b',
+        'bands': 4,
+        'output_folder': 'merged_tif_8b',
+        'singles_folder': 'single_images_8b'
+    }


 # ============================================================================
 # EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output
 # ============================================================================

-EVALSCRIPT_4BAND_MASKED = """
+EVALSCRIPT_8BAND = """
    //VERSION=3
    function setup() {
        return {
@ -117,9 +215,35 @@ EVALSCRIPT_4BAND_MASKED = """
    }
    function evaluatePixel(sample) {
        // Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0)
-        // This reduces output pixels and avoids NaN interpolation on client side
        if (sample.udm1 == 0) {
-            // Scale reflectance: DN → [0, 1] range
+            var scaledRed = 2.5 * sample.red / 10000;
+            var scaledGreen = 2.5 * sample.green / 10000;
+            var scaledBlue = 2.5 * sample.blue / 10000;
+            var scaledNIR = 2.5 * sample.nir / 10000;
+            return [scaledRed, scaledGreen, scaledBlue, scaledNIR];
+        } else {
+            return [NaN, NaN, NaN, NaN];
+        }
+    }
+"""
+
+EVALSCRIPT_4BAND_LEGACY = """
+    //VERSION=3
+    function setup() {
+        return {
+            input: [{
+                bands: ["red", "green", "blue", "nir", "udm1"],
+                units: "DN"
+            }],
+            output: {
+                bands: 4
+            }
+        };
+    }
+    function evaluatePixel(sample) {
+        // Cloud masking for legacy collection (same band names as new 8-band)
+        // udm1 = 0 means clear, non-zero means cloud/shadow/etc
+        if (sample.udm1 == 0) {
            var scaledRed = 2.5 * sample.red / 10000;
            var scaledGreen = 2.5 * sample.green / 10000;
            var scaledBlue = 2.5 * sample.blue / 10000;
@ -289,6 +413,7 @@ def download_tile(
    output_dir: Path,
    config,
    byoc,
+    evalscript: str,
    resolution: int = 3
 ) -> bool:
    """Download a single full tile (no geometry masking = lower PU) with exponential backoff."""
@ -300,9 +425,9 @@ def download_tile(
        try:
            size = bbox_to_dimensions(bbox, resolution=resolution)
            
-            # Create download request with 4-band cloud-masked evalscript (uint16)
+            # Create download request with appropriate evalscript for collection
            request = SentinelHubRequest(
-                evalscript=EVALSCRIPT_4BAND_MASKED,
+                evalscript=evalscript,
                input_data=[
                    SentinelHubRequest.input_data(
                        data_collection=byoc,
@ -350,6 +475,8 @@ def download_date(
    base_path: Path,
    config,
    byoc,
+    evalscript: str,
+    collection_info: dict,
    resolution: int = 3
 ) -> int:
    """
@ -357,14 +484,14 @@ def download_date(
    Returns number of successfully downloaded tiles.
    """
    
-    output_dir = base_path / 'single_images_8b' / date_str
+    output_dir = base_path / collection_info['singles_folder'] / date_str
    output_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...")
    
    successful = 0
    for idx, bbox in enumerate(bbox_list, 1):
-        if download_tile(date_str, bbox, output_dir, config, byoc, resolution):
+        if download_tile(date_str, bbox, output_dir, config, byoc, evalscript, resolution):
            successful += 1
        
        percentage = (idx / len(bbox_list)) * 100
@ -385,10 +512,10 @@ def download_date(
 # MERGE FUNCTION
 # ============================================================================

-def merge_tiles(date_str: str, base_path: Path) -> bool:
+def merge_tiles(date_str: str, base_path: Path, collection_info: dict) -> bool:
    """Merge downloaded tiles into single GeoTIFF using GDAL."""
    
-    single_images_dir = base_path / 'single_images_8b' / date_str
+    single_images_dir = base_path / collection_info['singles_folder'] / date_str
    
    # Find all response.tiff files
    file_list = [str(p) for p in single_images_dir.rglob('response.tiff')]
@ -397,8 +524,8 @@ def merge_tiles(date_str: str, base_path: Path) -> bool:
        print(f"  ✗ No tiles found to merge")
        return False
    
-    merged_tif_dir = base_path / 'merged_tif_8b'
-    merged_vrt_dir = base_path / 'merged_virtual_8b'
+    merged_tif_dir = base_path / collection_info['output_folder']
+    merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
    merged_tif_dir.mkdir(parents=True, exist_ok=True)
    merged_vrt_dir.mkdir(parents=True, exist_ok=True)
    
@ -453,9 +580,9 @@ def main():
    
    # Parse arguments
    parser = argparse.ArgumentParser(
-        description='Download Planet 8-band imagery with PU optimization'
+        description='Download Planet imagery with PU optimization (auto-detects 8-band vs legacy 4-band)'
    )
-    parser.add_argument('project', help='Project name (angata, chemba, xinavane, etc.)')
+    parser.add_argument('project', help='Project name (angata, chemba, xinavane, aura, etc.)')
    parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today')
    parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)')
    parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)')
@ -481,7 +608,7 @@ def main():
        date_str = datetime.date.today().strftime('%Y-%m-%d')
    
    print(f"{'='*70}")
-    print(f"Planet 8-Band Download - PU Optimized")
+    print(f"Planet Download - Auto-Detecting Collection (PU Optimized)")
    print(f"{'='*70}")
    print(f"Project: {args.project}")
    print(f"Date: {date_str}")
@ -489,7 +616,7 @@ def main():
    
    # Setup SentinelHub
    print(f"\nSetting up SentinelHub...")
-    config, byoc, catalog = setup_config()
+    config, catalog = setup_config()
    print(f"✓ SentinelHub configured")
    
    # Load geometries
@ -504,15 +631,26 @@ def main():
        print(f"\n✗ No tiles intersect field geometries. Exiting.")
        sys.exit(1)
    
+    # Auto-detect collection and get evalscript
+    byoc, collection_info = detect_collection(date_str, bbox_list, catalog, date_range_days=7)
+    
+    # Get appropriate evalscript
+    evalscript = EVALSCRIPT_8BAND if collection_info['bands'] == 4 and 'new' not in collection_info.get('note', '') else EVALSCRIPT_8BAND
+    if '4e56d0cb' not in collection_info['collection_id']:
+        evalscript = EVALSCRIPT_4BAND_LEGACY
+    
+    print(f"\n  Collection: {collection_info['name']}")
+    print(f"  Output folder: {collection_info['output_folder']}/")
+    
    # Check date availability
-    print(f"\nChecking data availability...")
+    print(f"\nChecking data availability for {date_str}...")
    if not check_date_has_data(date_str, bbox_list[0], catalog, byoc):
        print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.")
        sys.exit(0)
    
    # Download tiles
    print(f"\n{'='*70}")
-    downloaded = download_date(date_str, bbox_list, base_path, config, byoc, args.resolution)
+    downloaded = download_date(date_str, bbox_list, base_path, config, byoc, evalscript, collection_info, args.resolution)
    
    if downloaded == 0:
        print(f"\n✗ No tiles downloaded. Exiting.")
@ -522,20 +660,20 @@ def main():
    if not args.skip_merge:
        print(f"\n{'='*70}")
        print(f"Merging tiles...")
-        if merge_tiles(date_str, base_path):
+        if merge_tiles(date_str, base_path, collection_info):
            print(f"✓ Merge complete")
            
            # Cleanup intermediate files
            if args.cleanup:
                print(f"\nCleaning up intermediate files...")
                import shutil
-                single_images_dir = base_path / 'single_images_8b' / date_str
-                merged_vrt_dir = base_path / 'merged_virtual_8b'
+                single_images_dir = base_path / collection_info['singles_folder'] / date_str
+                merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
                
                try:
                    if single_images_dir.exists():
                        shutil.rmtree(single_images_dir)
-                        print(f"  ✓ Deleted {single_images_dir.name}/{date_str}")
+                        print(f"  ✓ Deleted {collection_info['singles_folder']}/{date_str}")
                    
                    # Clean old VRT files
                    for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"):
@ -549,7 +687,7 @@ def main():
    
    print(f"\n{'='*70}")
    print(f"✓ Done!")
-    print(f"Output: {base_path / 'merged_tif_8b' / f'{date_str}.tif'}")
+    print(f"Output: {base_path / collection_info['output_folder'] / f'{date_str}.tif'}")
    print(f"{'='*70}")


--- a/r_app/20_ci_extraction.R
+++ b/r_app/20_ci_extraction.R
@ -41,8 +41,9 @@ main <- function() {
  args <- commandArgs(trailingOnly = TRUE)
  
  # Process end_date argument
-  if (length(args) >= 1 && !is.na(args[1])) {
-    end_date <- as.Date(args[1])
+  if (length(args) >= 1 && !is.na(args[1]) && args[1] != "") {
+    # Parse date explicitly in YYYY-MM-DD format from command line
+    end_date <- as.Date(args[1], format = "%Y-%m-%d")
    if (is.na(end_date)) {
      warning("Invalid end_date provided. Using default (current date).")
      end_date <- Sys.Date()
--- a/r_app/40_mosaic_creation.R
+++ b/r_app/40_mosaic_creation.R
@ -50,7 +50,8 @@ main <- function() {
  
  # Process end_date argument with default
  if (length(args) >= 1 && !is.na(args[1])) {
-    end_date <- as.Date(args[1])
+    # Parse date explicitly in YYYY-MM-DD format from command line
+    end_date <- as.Date(args[1], format = "%Y-%m-%d")
    if (is.na(end_date)) {
      message("Invalid end_date provided. Using current date.")
      end_date <- Sys.Date()
@ -96,18 +97,18 @@ main <- function() {
  assign("data_source", data_source, envir = .GlobalEnv)
  
  tryCatch({
-    source("parameters_project.R")
-    source("40_mosaic_creation_utils.R")
-    safe_log(paste("Successfully sourced files from default directory."))
+    source("r_app/parameters_project.R")
+    source("r_app/40_mosaic_creation_utils.R")
+    safe_log(paste("Successfully sourced files from 'r_app' directory."))
  }, error = function(e) {
-    message("Note: Could not open files from default directory (expected on some systems)")
-    message("Attempting to source from 'r_app' directory instead...")
+    message("Note: Could not open files from r_app directory")
+    message("Attempting to source from default directory instead...")
    tryCatch({
-      source(here::here("r_app", "parameters_project.R"))
-      source(here::here("r_app", "40_mosaic_creation_utils.R"))
-      message("✓ Successfully sourced files from 'r_app' directory")
+      source("parameters_project.R")
+      source("40_mosaic_creation_utils.R")
+      message("✓ Successfully sourced files from default directory")
    }, error = function(e) {
-      stop("Failed to source required files from both default and 'r_app' directories.")
+      stop("Failed to source required files from both 'r_app' and default directories.")
    })
  })
  
--- a/r_app/40_mosaic_creation_utils.R
+++ b/r_app/40_mosaic_creation_utils.R
@ -253,7 +253,7 @@ count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_bounda
        missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
        
        aggregated_results[[tif_idx]] <- data.frame(
-          filename = tif_file,
+          filename = basename(tif_file),
          notNA = total_notna,
          total_pixels = total_pixels,
          missing_pixels_percentage = missing_pct,
@ -265,7 +265,7 @@ count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_bounda
      }, error = function(e) {
        safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING")
        aggregated_results[[tif_idx]] <<- data.frame(
-          filename = tif_file,
+          filename = basename(tif_file),
          notNA = NA_real_,
          total_pixels = NA_real_,
          missing_pixels_percentage = 100,
@ -543,8 +543,12 @@ save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALS
  # Create output directory if it doesn't exist
  dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
  
-  # Create full file path
-  file_path <- here::here(output_dir, file_name)
+  # Create full file path - use file.path() since output_dir may be absolute path
+  # Ensure file_name has .tif extension
+  if (!grepl("\\.tif$|\\.TIF$", file_name)) {
+    file_name <- paste0(file_name, ".tif")
+  }
+  file_path <- file.path(output_dir, file_name)
  
  # Get cloud mask if it exists
  cloud_mask <- attr(mosaic_raster, "cloud_mask")
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@ -181,7 +181,8 @@ main <- function() {
  # end_date (arg 1)
  # Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
  end_date <- if (length(args) >= 1 && !is.na(args[1])) {
-    as.Date(args[1])
+    # Parse date explicitly in YYYY-MM-DD format from command line
+    as.Date(args[1], format = "%Y-%m-%d")
  } else if (exists("end_date", envir = .GlobalEnv)) {
    global_date <- get("end_date", envir = .GlobalEnv)
    # Check if it's a valid Date with length > 0
@ -239,6 +240,11 @@ main <- function() {
    stop("Error loading parameters_project.R: ", e$message)
  })
  
+  # Define paths for mosaic detection (used in PHASE 1)
+  base_project_path <- file.path("laravel_app", "storage", "app", project_dir)
+  weekly_tile_max <- file.path(base_project_path, "weekly_tile_max")
+  weekly_mosaic <- file.path(base_project_path, "weekly_mosaic")
+  
  tryCatch({
    source(here("r_app", "30_growth_model_utils.R"))
  }, error = function(e) {
--- a/r_app/parameters_project.R
+++ b/r_app/parameters_project.R
@ -16,7 +16,34 @@ suppressPackageStartupMessages({
  library(jsonlite)  # For reading tiling_config.json
 })

-# 2. Smart detection for tile-based vs single-file mosaic approach
+# 2. Client type mapping (for conditional script execution)
+# ---------------------------------------------------------
+# Maps project names to client types for pipeline control
+# Client types:
+#   - "cane_supply":          Runs Scripts 20,21,30,31,80,91 (full pipeline with Excel output)
+#   - "agronomic_support":    Runs Scripts 80,90 only (KPI calculation + Word report)
+#   - "extension_service":    (Future - not yet implemented)
+#
+# NOTE: This will eventually migrate to Laravel environment variables/database
+#       For now, maintain this mapping and update as projects are added
+CLIENT_TYPE_MAP <- list(
+  "angata" = "cane_supply",
+  "aura" = "agronomic_support",
+  "chemba" = "cane_supply",
+  "xinavane" = "cane_supply",
+  "esa" = "cane_supply"
+)
+
+get_client_type <- function(project_name) {
+  client_type <- CLIENT_TYPE_MAP[[project_name]]
+  if (is.null(client_type)) {
+    warning(sprintf("Project '%s' not in CLIENT_TYPE_MAP - defaulting to 'cane_supply'", project_name))
+    return("cane_supply")
+  }
+  return(client_type)
+}
+
+# 3. Smart detection for tile-based vs single-file mosaic approach
 # ----------------------------------------------------------------
 detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
  # PRIORITY 1: Check for tiling_config.json metadata file from script 10
@ -112,7 +139,7 @@ detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NUL
  ))
 }

-# 2. Define project directory structure
+# 4. Define project directory structure
 # -----------------------------------
 setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") {
  # Base directories
@ -185,7 +212,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
 }

 #set working dir.
-# 3. Load field boundaries
+# 5. Load field boundaries
 # ----------------------
 load_field_boundaries <- function(data_dir) {
  # Choose field boundaries file based on project and script type
@ -279,7 +306,7 @@ load_field_boundaries <- function(data_dir) {
  })
 }

-# 4. Load harvesting data
+# 6. Load harvesting data
 # ---------------------
 load_harvesting_data <- function(data_dir) {
  harvest_file <- here(data_dir, "harvest.xlsx")
@ -370,7 +397,7 @@ log_head <- function(list, level = "INFO") {
  log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
 }

-# 6. Set up full logging system with file output
+# 8. Set up full logging system with file output
 # -------------------------------------------
 setup_logging <- function(log_dir) {
  log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log"))
@ -402,7 +429,7 @@ setup_logging <- function(log_dir) {
  ))
 }

-# 7. Initialize the project
+# 9. Initialize the project
 # ----------------------
 # Export project directories and settings
 initialize_project <- function(project_dir, data_source = "merged_tif_8b") {
--- a/r_app/run_full_pipeline.R
+++ b/r_app/run_full_pipeline.R
@ -30,19 +30,48 @@
 # ==============================================================================

 # *** EDIT THESE VARIABLES ***
-end_date <- as.Date("2026-01-27")     # or specify: as.Date("2026-01-27") , Sys.Date()
-offset <- 7                        # days to look back
-project_dir <- "angata"              # project name: "esa", "aura", "angata", "chemba"
+end_date <- as.Date("2025-12-31")     # or specify: as.Date("2026-01-27") , Sys.Date()
+offset <- 7                     # days to look back
+project_dir <- "aura"              # project name: "esa", "aura", "angata", "chemba"
 data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif"
 force_rerun <- FALSE               # Set to TRUE to force all scripts to run even if outputs exist
 # ***************************

+# Load client type mapping from parameters_project.R
+source("r_app/parameters_project.R")
+client_type <- get_client_type(project_dir)
+cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type))
+
 # Format dates
 end_date_str <- format(as.Date(end_date), "%Y-%m-%d")

 # Track success of pipeline
 pipeline_success <- TRUE

+# Define conditional script execution based on client type
+# Client types:
+#   - "cane_supply":          Runs Scripts 20,21,22,23,30,31,80,91 (full pipeline with Excel output)
+#   - "agronomic_support":    Runs Scripts 20,30,80,90 only (KPI calculation + Word report)
+# 
+# Scripts that ALWAYS run (regardless of client type):
+#   - 00: Python Download
+#   - 10: Tiling (if outputs don't exist)
+#   - 20: CI Extraction
+#   - 30: Growth Model
+#   - 40: Mosaic Creation
+#   - 80: KPI Calculation
+# 
+# Scripts that are client-type specific:
+#   - 21: CI RDS→CSV (cane_supply only)
+#   - 22: (cane_supply only)
+#   - 23: (cane_supply only)
+#   - 31: Harvest Imminent (cane_supply only)
+#   - 90: Legacy Word Report (agronomic_support only)
+#   - 91: Modern Excel Report (cane_supply only)
+skip_cane_supply_only <- (client_type != "cane_supply")  # Skip Scripts 21,22,23,31 for non-cane_supply
+run_legacy_report <- (client_type == "agronomic_support")  # Script 90 for agronomic support
+run_modern_report <- (client_type == "cane_supply")  # Script 91 for cane supply
+
 # ==============================================================================
 # INTELLIGENT CHECKING: What has already been completed?
 # ==============================================================================
@ -75,16 +104,21 @@ detect_mosaic_mode_simple <- function(project_dir) {
 mosaic_mode <- detect_mosaic_mode_simple(project_dir)
 cat(sprintf("Auto-detected mosaic mode: %s\n", mosaic_mode))

-# Check Script 10 outputs - look for daily_tiles_split/{GRID_SIZE} (flexible grid detection)
+# Check Script 10 outputs - FLEXIBLE: look for tiles either directly OR in grid subdirs
 tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split")
 tiles_dates <- c()
 if (dir.exists(tiles_split_base)) {
-  # Look for any grid-size subdirectories (5x5, 10x10, etc.)
+  # Try grid-size subdirectories first (5x5, 10x10, etc.) - preferred new structure
  subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
  grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
+  
  if (length(grid_patterns) > 0) {
+    # New structure: daily_tiles_split/{grid_size}/{dates}/
    grid_dir <- file.path(tiles_split_base, grid_patterns[1])
    tiles_dates <- list.dirs(grid_dir, full.names = FALSE, recursive = FALSE)
+  } else {
+    # Old structure: daily_tiles_split/{dates}/ (no grid-size subfolder)
+    tiles_dates <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
  }
 }
 cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates)))
@ -103,23 +137,28 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
 # For now, just note that CSV is time-dependent, not a good skip indicator
 cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")

-# Check Script 40 outputs (mosaics) - flexible detection for both tile-based and single-file
+# Check Script 40 outputs (mosaics) - check for THIS WEEK's mosaic specifically
+# (important for Script 80, which needs the current week's mosaic)
+current_week <- as.numeric(format(end_date, "%V"))
+current_year <- as.numeric(format(end_date, "%Y"))
+week_mosaic_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
+
 mosaic_files <- c()
 if (mosaic_mode == "tiled") {
-  # For tile-based: look in weekly_tile_max/{grid_size}/
+  # For tile-based: look in weekly_tile_max/{grid_size}/ for this week's file
  weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
  subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
  grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
  if (length(grid_patterns) > 0) {
    mosaic_dir <- file.path(weekly_tile_max, grid_patterns[1])
-    mosaic_files <- list.files(mosaic_dir, pattern = "\\.tif$")
+    mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern)
  }
 } else if (mosaic_mode == "single-file") {
-  # For single-file: look in weekly_mosaic/
+  # For single-file: look in weekly_mosaic/ for this week's file
  mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
-  mosaic_files <- list.files(mosaic_dir, pattern = "^week_.*\\.tif$")
+  mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern)
 }
-cat(sprintf("Script 40: %d mosaic files exist\n", length(mosaic_files)))
+cat(sprintf("Script 40: %d mosaic files exist for week %02d\n", length(mosaic_files), current_week))

 # Check Script 80 outputs (KPIs in reports/kpis/field_stats)
 kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
@ -130,19 +169,29 @@ kpi_files <- if (dir.exists(kpi_dir)) {
 }
 cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))

-# Determine if scripts should run based on outputs
-skip_10 <- length(tiles_dates) > 0 && !force_rerun
-skip_20 <- length(ci_files) > 0 && !force_rerun
-skip_21 <- length(ci_files) > 0 && !force_rerun  # Skip 21 if 20 is skipped
-skip_40 <- length(mosaic_files) > 0 && !force_rerun
-skip_80 <- FALSE  # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks
+# Determine if scripts should run based on outputs AND client type
+skip_10 <- (length(tiles_dates) > 0 && !force_rerun)  # Always check tiles
+skip_20 <- FALSE  # Script 20 ALWAYS runs for all client types - processes new downloaded data
+skip_21 <- skip_cane_supply_only  # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion)
+skip_22 <- skip_cane_supply_only  # Script 22 runs ONLY for cane_supply clients
+skip_23 <- skip_cane_supply_only  # Script 23 runs ONLY for cane_supply clients
+skip_30 <- FALSE  # Script 30 ALWAYS runs for all client types
+skip_31 <- skip_cane_supply_only  # Script 31 runs ONLY for cane_supply clients
+skip_40 <- (length(mosaic_files) > 0 && !force_rerun)  # Always check mosaics
+skip_80 <- FALSE  # Script 80 ALWAYS runs for all client types - calculates KPIs for current week

-cat("\nSkipping decisions:\n")
-cat(sprintf("  Script 10: %s\n", if(skip_10) "SKIP (tiles exist)" else "RUN"))
-cat(sprintf("  Script 20: %s\n", if(skip_20) "SKIP (CI exists)" else "RUN"))
-cat(sprintf("  Script 21: %s\n", if(skip_21) "SKIP (CI exists)" else "RUN"))
-cat(sprintf("  Script 40: %s\n", if(skip_40) "SKIP (mosaics exist)" else "RUN"))
-cat(sprintf("  Script 80: %s\n", if(skip_80) "SKIP (KPIs exist)" else "RUN"))
+cat("\nSkipping decisions (based on outputs AND client type):\n")
+cat(sprintf("  Script 10: %s\n", if(skip_10) "SKIP" else "RUN"))
+cat(sprintf("  Script 20: RUN (always runs to process new downloads)\n"))
+cat(sprintf("  Script 21: %s %s\n", if(skip_21) "SKIP" else "RUN", if(skip_cane_supply_only && !skip_21) "(non-cane_supply client)" else ""))
+cat(sprintf("  Script 22: %s %s\n", if(skip_22) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else ""))
+cat(sprintf("  Script 23: %s %s\n", if(skip_23) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else ""))
+cat(sprintf("  Script 30: %s (always runs)\n", if(skip_30) "SKIP" else "RUN"))
+cat(sprintf("  Script 31: %s %s\n", if(skip_31) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else ""))
+cat(sprintf("  Script 40: %s %s\n", if(skip_40) "SKIP" else "RUN", if(!skip_40) "" else "(mosaics exist)"))
+cat(sprintf("  Script 80: %s (always runs)\n", if(skip_80) "SKIP" else "RUN"))
+cat(sprintf("  Script 90: %s %s\n", if(!run_legacy_report) "SKIP" else "RUN", if(run_legacy_report) "(agronomic_support legacy report)" else ""))
+cat(sprintf("  Script 91: %s %s\n", if(!run_modern_report) "SKIP" else "RUN", if(run_modern_report) "(cane_supply modern report)" else ""))

 # ==============================================================================
 # PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY)
@ -157,18 +206,28 @@ tryCatch({
  existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
  existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)
  
-  # Get existing dates from tiles (better indicator of completion)
+  # Get existing dates from tiles (better indicator of completion for tiled projects)
  existing_tile_dates <- tiles_dates
  
+  # For single-file projects, use raw TIFF files as the indicator instead
+  # This prevents re-downloading data that already exists
+  if (mosaic_mode == "single-file" && length(existing_tiff_dates) > 0) {
+    existing_tile_dates <- existing_tiff_dates
+  }
+  
  # Find missing dates in the window
  start_date <- end_date - offset
  date_seq <- seq(start_date, end_date, by = "day")
  target_dates <- format(date_seq, "%Y-%m-%d")
  
-  # Only download if tiles don't exist yet (more reliable than checking raw TIFFs)
+  # Only download if files don't exist yet (tiles for tiled projects, TIFFs for single-file)
  missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]
  
-  cat(sprintf("  Existing tiled dates: %d\n", length(existing_tile_dates)))
+  if (mosaic_mode == "single-file") {
+    cat(sprintf("  Existing TIFF dates: %d\n", length(existing_tile_dates)))
+  } else {
+    cat(sprintf("  Existing tiled dates: %d\n", length(existing_tile_dates)))
+  }
  cat(sprintf("  Missing dates in window: %d\n", length(missing_dates)))
  
  # Download each missing date
@ -217,6 +276,12 @@ tryCatch({
 if (pipeline_success && !skip_10) {
  cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n")
  tryCatch({
+    # CRITICAL: Save global variables before sourcing Script 10 (it overwrites end_date, offset, etc.)
+    saved_end_date <- end_date
+    saved_offset <- offset
+    saved_project_dir <- project_dir
+    saved_data_source <- data_source
+    
    # Set environment variables for the script (Script 10 uses these for filtering)
    assign("PROJECT", project_dir, envir = .GlobalEnv)
    
@ -225,6 +290,12 @@ if (pipeline_success && !skip_10) {
    source("r_app/10_create_master_grid_and_split_tiffs.R")
    sink()
    
+    # CRITICAL: Restore global variables after sourcing Script 10
+    end_date <- saved_end_date
+    offset <- saved_offset
+    project_dir <- saved_project_dir
+    data_source <- saved_data_source
+    
    # Verify output
    tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
    if (dir.exists(tiles_dir)) {
@ -248,14 +319,15 @@ if (pipeline_success && !skip_10) {
 if (pipeline_success && !skip_20) {
  cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n")
  tryCatch({
-    # Set environment variables for the script
-    assign("end_date", end_date, envir = .GlobalEnv)
-    assign("offset", offset, envir = .GlobalEnv)
-    assign("project_dir", project_dir, envir = .GlobalEnv)
-    assign("data_source", data_source, envir = .GlobalEnv)
+    # Run Script 20 via system() to pass command-line args just like from terminal
+    # Arguments: end_date offset project_dir data_source
+    cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/20_ci_extraction.R "%s" %d "%s" "%s"', 
+                   format(end_date, "%Y-%m-%d"), offset, project_dir, data_source)
+    result <- system(cmd)
    
-    source("r_app/20_ci_extraction.R")
-    main()  # Call main() to execute the script with the environment variables
+    if (result != 0) {
+      stop("Script 20 exited with error code:", result)
+    }
    
    # Verify CI output was created
    ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
@ -306,17 +378,18 @@ if (pipeline_success && !skip_21) {
 # ==============================================================================
 # SCRIPT 30: INTERPOLATE GROWTH MODEL
 # ==============================================================================
-if (pipeline_success) {
+if (pipeline_success && !skip_30) {
  cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
  tryCatch({
-    # Set environment variables for the script
-    assign("end_date", end_date, envir = .GlobalEnv)
-    assign("offset", offset, envir = .GlobalEnv)
-    assign("project_dir", project_dir, envir = .GlobalEnv)
-    assign("data_source", data_source, envir = .GlobalEnv)
+    # Run Script 30 via system() to pass command-line args just like from terminal
+    # Script 30 expects: project_dir as first argument only
+    cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/30_interpolate_growth_model.R "%s"', 
+                   project_dir)
+    result <- system(cmd)
    
-    source("r_app/30_interpolate_growth_model.R")
-    main()  # Call main() to execute the script with the environment variables
+    if (result != 0) {
+      stop("Script 30 exited with error code:", result)
+    }
    
    # Verify interpolated output
    growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
@ -335,7 +408,7 @@ if (pipeline_success) {
 # ==============================================================================
 # PYTHON 31: HARVEST IMMINENT WEEKLY
 # ==============================================================================
-if (pipeline_success) {
+if (pipeline_success && !skip_31) {
  cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n")
  tryCatch({
    # Run Python script in pytorch_gpu conda environment
@ -364,6 +437,8 @@ if (pipeline_success) {
    setwd(original_dir)
    cat("⚠ Script 31 error:", e$message, "\n")
  })
+} else if (skip_31) {
+  cat("\n========== SKIPPING SCRIPT 31 (non-cane_supply client type) ==========\n")
 }

 # ==============================================================================
@ -372,20 +447,41 @@ if (pipeline_success) {
 if (pipeline_success && !skip_40) {
  cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n")
  tryCatch({
-    # Set environment variables for the script
-    assign("end_date", end_date, envir = .GlobalEnv)
-    assign("offset", offset, envir = .GlobalEnv)
-    assign("project_dir", project_dir, envir = .GlobalEnv)
-    assign("data_source", data_source, envir = .GlobalEnv)
+    # Run Script 40 via system() to pass command-line args just like from terminal
+    # Use full path and --vanilla to avoid renv/environment issues
+    # Arguments: end_date offset project_dir (file_name_tif is auto-generated from dates)
+    cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/40_mosaic_creation.R "%s" %d "%s"', 
+                   format(end_date, "%Y-%m-%d"), offset, project_dir)
+    result <- system(cmd)
    
-    source("r_app/40_mosaic_creation.R")
-    main()  # Call main() to execute the script with the environment variables
+    if (result != 0) {
+      stop("Script 40 exited with error code:", result)
+    }
    
-    # Verify mosaic output
-    mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
-    if (dir.exists(mosaic_dir)) {
-      files <- list.files(mosaic_dir, pattern = "\\.tif$")
-      cat(sprintf("✓ Script 40 completed - generated %d mosaic files\n", length(files)))
+    # Verify mosaic output - check based on mosaic mode (tiled vs single-file)
+    mosaic_files_check <- c()
+    if (mosaic_mode == "tiled") {
+      mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
+      if (dir.exists(mosaic_dir)) {
+        # Check for current week's file only
+        current_week_check <- as.numeric(format(end_date, "%V"))
+        current_year_check <- as.numeric(format(end_date, "%Y"))
+        week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check)
+        mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check)
+      }
+    } else {
+      mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
+      if (dir.exists(mosaic_dir)) {
+        # Check for current week's file only
+        current_week_check <- as.numeric(format(end_date, "%V"))
+        current_year_check <- as.numeric(format(end_date, "%Y"))
+        week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check)
+        mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check)
+      }
+    }
+    
+    if (length(mosaic_files_check) > 0) {
+      cat(sprintf("✓ Script 40 completed - created mosaic for week %02d\n", current_week))
    } else {
      cat("✓ Script 40 completed\n")
    }
@ -403,16 +499,15 @@ if (pipeline_success && !skip_40) {
 if (pipeline_success) {  # Always run Script 80 - it calculates KPIs for the current week
  cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n")
  tryCatch({
-    # Set environment variables for the script (Script 80's main() uses these as fallbacks)
-    # NOTE: end_date is already a Date, just assign directly without as.Date()
-    assign("end_date", end_date, envir = .GlobalEnv)
-    assign("end_date_str", end_date_str, envir = .GlobalEnv)
-    assign("offset", offset, envir = .GlobalEnv)
-    assign("project_dir", project_dir, envir = .GlobalEnv)
-    assign("data_source", data_source, envir = .GlobalEnv)
+    # Run Script 80 via system() to pass command-line args just like from terminal
+    # Use full path and --vanilla to avoid renv/environment issues
+    cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/80_calculate_kpis.R "%s" %d "%s" "%s"', 
+                   format(end_date, "%Y-%m-%d"), offset, project_dir, data_source)
+    result <- system(cmd)
    
-    source("r_app/80_calculate_kpis.R")
-    main()  # Call main() to execute the script with the environment variables
+    if (result != 0) {
+      stop("Script 80 exited with error code:", result)
+    }
    
    # Verify KPI output
    kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
@ -424,8 +519,6 @@ if (pipeline_success) {  # Always run Script 80 - it calculates KPIs for the cur
    }
  }, error = function(e) {
    cat("✗ Error in Script 80:", e$message, "\n")
-    cat("Full error:\n")
-    print(e)
    pipeline_success <<- FALSE
  })
 }