diff --git a/python_app/00_download_8band_pu_optimized.py b/python_app/00_download_8band_pu_optimized.py index 8c0991b..2d4fbb3 100644 --- a/python_app/00_download_8band_pu_optimized.py +++ b/python_app/00_download_8band_pu_optimized.py @@ -89,20 +89,118 @@ def setup_config(): config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44') config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos') - # BYOC collection for Planet 8-band data - collection_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' - byoc = DataCollection.define_byoc(collection_id, name='planet_data_8b', is_timeless=True) - catalog = SentinelHubCatalog(config=config) - return config, byoc, catalog + return config, catalog + + +def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_days: int = 7) -> Tuple: + """ + Auto-detect which Planet collection is available for this project. + + Checks a week of dates (backwards from date_str) to ensure robust detection. + If ANY date has data in the new 8-band collection, use that. + If no dates have data in new collection, fall back to legacy 4-band. + + Args: + date_str: Reference date (YYYY-MM-DD) + bbox_list: List of bounding boxes for testing + catalog: SentinelHubCatalog instance + date_range_days: Number of days to check backwards (default: 7) + + Returns: + (byoc, collection_info_dict) where byoc is DataCollection and dict contains metadata + """ + + new_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' # 8-band (new) + old_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c' # 4-band (legacy) + test_bbox = bbox_list[0] + + # Generate date range (backwards from date_str) + try: + ref_date = datetime.datetime.strptime(date_str, '%Y-%m-%d') + except ValueError: + print(f"⚠️ Invalid date format: {date_str}. Using today.") + ref_date = datetime.datetime.now() + + date_range = [ + (ref_date - datetime.timedelta(days=i)).strftime('%Y-%m-%d') + for i in range(date_range_days) + ] + + print(f"\nAuto-detecting Planet collection (checking {date_range_days} days)...") + print(f" Test range: {date_range[-1]} to {date_range[0]}") + + # Try new collection first + print(f"\n Trying 8-band collection: {new_id}") + byoc_new = DataCollection.define_byoc(new_id, name='planet_data_8b', is_timeless=True) + + for test_date in date_range: + try: + search = catalog.search( + collection=byoc_new, + bbox=test_bbox, + time=(test_date, test_date), + filter=None + ) + tiles = list(search) + if len(tiles) > 0: + print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)") + print(f" ✓ Using 8-band collection") + return byoc_new, { + 'collection_id': new_id, + 'name': 'planet_data_8b', + 'bands': 4, + 'output_folder': 'merged_tif_8b', + 'singles_folder': 'single_images_8b' + } + except Exception as e: + print(f" ⚠️ {test_date}: {str(e)[:60]}") + + # No data in new collection, try legacy + print(f"\n ✗ No data found in 8-band collection") + print(f" Trying legacy 4-band collection: {old_id}") + byoc_old = DataCollection.define_byoc(old_id, name='planet_data', is_timeless=True) + + for test_date in date_range: + try: + search = catalog.search( + collection=byoc_old, + bbox=test_bbox, + time=(test_date, test_date), + filter=None + ) + tiles = list(search) + if len(tiles) > 0: + print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)") + print(f" ✓ Using legacy 4-band collection") + return byoc_old, { + 'collection_id': old_id, + 'name': 'planet_data', + 'bands': 4, + 'output_folder': 'merged_tif', + 'singles_folder': 'single_images' + } + except Exception as e: + print(f" ⚠️ {test_date}: {str(e)[:60]}") + + # Neither collection has data + print(f"\n ⚠️ No data found in either collection for {date_range_days} days") + print(f" Defaulting to 8-band collection (will attempt download anyway)") + return byoc_new, { + 'collection_id': new_id, + 'name': 'planet_data_8b', + 'bands': 4, + 'output_folder': 'merged_tif_8b', + 'singles_folder': 'single_images_8b' + } # ============================================================================ # EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output # ============================================================================ -EVALSCRIPT_4BAND_MASKED = """ +EVALSCRIPT_8BAND = """ //VERSION=3 function setup() { return { @@ -117,9 +215,35 @@ EVALSCRIPT_4BAND_MASKED = """ } function evaluatePixel(sample) { // Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0) - // This reduces output pixels and avoids NaN interpolation on client side if (sample.udm1 == 0) { - // Scale reflectance: DN → [0, 1] range + var scaledRed = 2.5 * sample.red / 10000; + var scaledGreen = 2.5 * sample.green / 10000; + var scaledBlue = 2.5 * sample.blue / 10000; + var scaledNIR = 2.5 * sample.nir / 10000; + return [scaledRed, scaledGreen, scaledBlue, scaledNIR]; + } else { + return [NaN, NaN, NaN, NaN]; + } + } +""" + +EVALSCRIPT_4BAND_LEGACY = """ + //VERSION=3 + function setup() { + return { + input: [{ + bands: ["red", "green", "blue", "nir", "udm1"], + units: "DN" + }], + output: { + bands: 4 + } + }; + } + function evaluatePixel(sample) { + // Cloud masking for legacy collection (same band names as new 8-band) + // udm1 = 0 means clear, non-zero means cloud/shadow/etc + if (sample.udm1 == 0) { var scaledRed = 2.5 * sample.red / 10000; var scaledGreen = 2.5 * sample.green / 10000; var scaledBlue = 2.5 * sample.blue / 10000; @@ -289,6 +413,7 @@ def download_tile( output_dir: Path, config, byoc, + evalscript: str, resolution: int = 3 ) -> bool: """Download a single full tile (no geometry masking = lower PU) with exponential backoff.""" @@ -300,9 +425,9 @@ def download_tile( try: size = bbox_to_dimensions(bbox, resolution=resolution) - # Create download request with 4-band cloud-masked evalscript (uint16) + # Create download request with appropriate evalscript for collection request = SentinelHubRequest( - evalscript=EVALSCRIPT_4BAND_MASKED, + evalscript=evalscript, input_data=[ SentinelHubRequest.input_data( data_collection=byoc, @@ -350,6 +475,8 @@ def download_date( base_path: Path, config, byoc, + evalscript: str, + collection_info: dict, resolution: int = 3 ) -> int: """ @@ -357,14 +484,14 @@ def download_date( Returns number of successfully downloaded tiles. """ - output_dir = base_path / 'single_images_8b' / date_str + output_dir = base_path / collection_info['singles_folder'] / date_str output_dir.mkdir(parents=True, exist_ok=True) print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...") successful = 0 for idx, bbox in enumerate(bbox_list, 1): - if download_tile(date_str, bbox, output_dir, config, byoc, resolution): + if download_tile(date_str, bbox, output_dir, config, byoc, evalscript, resolution): successful += 1 percentage = (idx / len(bbox_list)) * 100 @@ -385,10 +512,10 @@ def download_date( # MERGE FUNCTION # ============================================================================ -def merge_tiles(date_str: str, base_path: Path) -> bool: +def merge_tiles(date_str: str, base_path: Path, collection_info: dict) -> bool: """Merge downloaded tiles into single GeoTIFF using GDAL.""" - single_images_dir = base_path / 'single_images_8b' / date_str + single_images_dir = base_path / collection_info['singles_folder'] / date_str # Find all response.tiff files file_list = [str(p) for p in single_images_dir.rglob('response.tiff')] @@ -397,8 +524,8 @@ def merge_tiles(date_str: str, base_path: Path) -> bool: print(f" ✗ No tiles found to merge") return False - merged_tif_dir = base_path / 'merged_tif_8b' - merged_vrt_dir = base_path / 'merged_virtual_8b' + merged_tif_dir = base_path / collection_info['output_folder'] + merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}" merged_tif_dir.mkdir(parents=True, exist_ok=True) merged_vrt_dir.mkdir(parents=True, exist_ok=True) @@ -453,9 +580,9 @@ def main(): # Parse arguments parser = argparse.ArgumentParser( - description='Download Planet 8-band imagery with PU optimization' + description='Download Planet imagery with PU optimization (auto-detects 8-band vs legacy 4-band)' ) - parser.add_argument('project', help='Project name (angata, chemba, xinavane, etc.)') + parser.add_argument('project', help='Project name (angata, chemba, xinavane, aura, etc.)') parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today') parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)') parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)') @@ -481,7 +608,7 @@ def main(): date_str = datetime.date.today().strftime('%Y-%m-%d') print(f"{'='*70}") - print(f"Planet 8-Band Download - PU Optimized") + print(f"Planet Download - Auto-Detecting Collection (PU Optimized)") print(f"{'='*70}") print(f"Project: {args.project}") print(f"Date: {date_str}") @@ -489,7 +616,7 @@ def main(): # Setup SentinelHub print(f"\nSetting up SentinelHub...") - config, byoc, catalog = setup_config() + config, catalog = setup_config() print(f"✓ SentinelHub configured") # Load geometries @@ -504,15 +631,26 @@ def main(): print(f"\n✗ No tiles intersect field geometries. Exiting.") sys.exit(1) + # Auto-detect collection and get evalscript + byoc, collection_info = detect_collection(date_str, bbox_list, catalog, date_range_days=7) + + # Get appropriate evalscript + evalscript = EVALSCRIPT_8BAND if collection_info['bands'] == 4 and 'new' not in collection_info.get('note', '') else EVALSCRIPT_8BAND + if '4e56d0cb' not in collection_info['collection_id']: + evalscript = EVALSCRIPT_4BAND_LEGACY + + print(f"\n Collection: {collection_info['name']}") + print(f" Output folder: {collection_info['output_folder']}/") + # Check date availability - print(f"\nChecking data availability...") + print(f"\nChecking data availability for {date_str}...") if not check_date_has_data(date_str, bbox_list[0], catalog, byoc): print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.") sys.exit(0) # Download tiles print(f"\n{'='*70}") - downloaded = download_date(date_str, bbox_list, base_path, config, byoc, args.resolution) + downloaded = download_date(date_str, bbox_list, base_path, config, byoc, evalscript, collection_info, args.resolution) if downloaded == 0: print(f"\n✗ No tiles downloaded. Exiting.") @@ -522,20 +660,20 @@ def main(): if not args.skip_merge: print(f"\n{'='*70}") print(f"Merging tiles...") - if merge_tiles(date_str, base_path): + if merge_tiles(date_str, base_path, collection_info): print(f"✓ Merge complete") # Cleanup intermediate files if args.cleanup: print(f"\nCleaning up intermediate files...") import shutil - single_images_dir = base_path / 'single_images_8b' / date_str - merged_vrt_dir = base_path / 'merged_virtual_8b' + single_images_dir = base_path / collection_info['singles_folder'] / date_str + merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}" try: if single_images_dir.exists(): shutil.rmtree(single_images_dir) - print(f" ✓ Deleted {single_images_dir.name}/{date_str}") + print(f" ✓ Deleted {collection_info['singles_folder']}/{date_str}") # Clean old VRT files for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"): @@ -549,7 +687,7 @@ def main(): print(f"\n{'='*70}") print(f"✓ Done!") - print(f"Output: {base_path / 'merged_tif_8b' / f'{date_str}.tif'}") + print(f"Output: {base_path / collection_info['output_folder'] / f'{date_str}.tif'}") print(f"{'='*70}") diff --git a/r_app/20_ci_extraction.R b/r_app/20_ci_extraction.R index ab82188..1f751ae 100644 --- a/r_app/20_ci_extraction.R +++ b/r_app/20_ci_extraction.R @@ -41,8 +41,9 @@ main <- function() { args <- commandArgs(trailingOnly = TRUE) # Process end_date argument - if (length(args) >= 1 && !is.na(args[1])) { - end_date <- as.Date(args[1]) + if (length(args) >= 1 && !is.na(args[1]) && args[1] != "") { + # Parse date explicitly in YYYY-MM-DD format from command line + end_date <- as.Date(args[1], format = "%Y-%m-%d") if (is.na(end_date)) { warning("Invalid end_date provided. Using default (current date).") end_date <- Sys.Date() diff --git a/r_app/40_mosaic_creation.R b/r_app/40_mosaic_creation.R index 7efb281..bf9ced1 100644 --- a/r_app/40_mosaic_creation.R +++ b/r_app/40_mosaic_creation.R @@ -50,7 +50,8 @@ main <- function() { # Process end_date argument with default if (length(args) >= 1 && !is.na(args[1])) { - end_date <- as.Date(args[1]) + # Parse date explicitly in YYYY-MM-DD format from command line + end_date <- as.Date(args[1], format = "%Y-%m-%d") if (is.na(end_date)) { message("Invalid end_date provided. Using current date.") end_date <- Sys.Date() @@ -96,18 +97,18 @@ main <- function() { assign("data_source", data_source, envir = .GlobalEnv) tryCatch({ - source("parameters_project.R") - source("40_mosaic_creation_utils.R") - safe_log(paste("Successfully sourced files from default directory.")) + source("r_app/parameters_project.R") + source("r_app/40_mosaic_creation_utils.R") + safe_log(paste("Successfully sourced files from 'r_app' directory.")) }, error = function(e) { - message("Note: Could not open files from default directory (expected on some systems)") - message("Attempting to source from 'r_app' directory instead...") + message("Note: Could not open files from r_app directory") + message("Attempting to source from default directory instead...") tryCatch({ - source(here::here("r_app", "parameters_project.R")) - source(here::here("r_app", "40_mosaic_creation_utils.R")) - message("✓ Successfully sourced files from 'r_app' directory") + source("parameters_project.R") + source("40_mosaic_creation_utils.R") + message("✓ Successfully sourced files from default directory") }, error = function(e) { - stop("Failed to source required files from both default and 'r_app' directories.") + stop("Failed to source required files from both 'r_app' and default directories.") }) }) diff --git a/r_app/40_mosaic_creation_utils.R b/r_app/40_mosaic_creation_utils.R index dc7b778..bb9671a 100644 --- a/r_app/40_mosaic_creation_utils.R +++ b/r_app/40_mosaic_creation_utils.R @@ -253,7 +253,7 @@ count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_bounda missing_pct <- round(100 - ((total_notna / total_pixels) * 100)) aggregated_results[[tif_idx]] <- data.frame( - filename = tif_file, + filename = basename(tif_file), notNA = total_notna, total_pixels = total_pixels, missing_pixels_percentage = missing_pct, @@ -265,7 +265,7 @@ count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_bounda }, error = function(e) { safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING") aggregated_results[[tif_idx]] <<- data.frame( - filename = tif_file, + filename = basename(tif_file), notNA = NA_real_, total_pixels = NA_real_, missing_pixels_percentage = 100, @@ -543,8 +543,12 @@ save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALS # Create output directory if it doesn't exist dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - # Create full file path - file_path <- here::here(output_dir, file_name) + # Create full file path - use file.path() since output_dir may be absolute path + # Ensure file_name has .tif extension + if (!grepl("\\.tif$|\\.TIF$", file_name)) { + file_name <- paste0(file_name, ".tif") + } + file_path <- file.path(output_dir, file_name) # Get cloud mask if it exists cloud_mask <- attr(mosaic_raster, "cloud_mask") diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 40cfefb..22a7d38 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -181,7 +181,8 @@ main <- function() { # end_date (arg 1) # Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date() end_date <- if (length(args) >= 1 && !is.na(args[1])) { - as.Date(args[1]) + # Parse date explicitly in YYYY-MM-DD format from command line + as.Date(args[1], format = "%Y-%m-%d") } else if (exists("end_date", envir = .GlobalEnv)) { global_date <- get("end_date", envir = .GlobalEnv) # Check if it's a valid Date with length > 0 @@ -239,6 +240,11 @@ main <- function() { stop("Error loading parameters_project.R: ", e$message) }) + # Define paths for mosaic detection (used in PHASE 1) + base_project_path <- file.path("laravel_app", "storage", "app", project_dir) + weekly_tile_max <- file.path(base_project_path, "weekly_tile_max") + weekly_mosaic <- file.path(base_project_path, "weekly_mosaic") + tryCatch({ source(here("r_app", "30_growth_model_utils.R")) }, error = function(e) { diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index 5890a94..d366f41 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -16,7 +16,34 @@ suppressPackageStartupMessages({ library(jsonlite) # For reading tiling_config.json }) -# 2. Smart detection for tile-based vs single-file mosaic approach +# 2. Client type mapping (for conditional script execution) +# --------------------------------------------------------- +# Maps project names to client types for pipeline control +# Client types: +# - "cane_supply": Runs Scripts 20,21,30,31,80,91 (full pipeline with Excel output) +# - "agronomic_support": Runs Scripts 80,90 only (KPI calculation + Word report) +# - "extension_service": (Future - not yet implemented) +# +# NOTE: This will eventually migrate to Laravel environment variables/database +# For now, maintain this mapping and update as projects are added +CLIENT_TYPE_MAP <- list( + "angata" = "cane_supply", + "aura" = "agronomic_support", + "chemba" = "cane_supply", + "xinavane" = "cane_supply", + "esa" = "cane_supply" +) + +get_client_type <- function(project_name) { + client_type <- CLIENT_TYPE_MAP[[project_name]] + if (is.null(client_type)) { + warning(sprintf("Project '%s' not in CLIENT_TYPE_MAP - defaulting to 'cane_supply'", project_name)) + return("cane_supply") + } + return(client_type) +} + +# 3. Smart detection for tile-based vs single-file mosaic approach # ---------------------------------------------------------------- detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { # PRIORITY 1: Check for tiling_config.json metadata file from script 10 @@ -112,7 +139,7 @@ detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NUL )) } -# 2. Define project directory structure +# 4. Define project directory structure # ----------------------------------- setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") { # Base directories @@ -185,7 +212,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b" } #set working dir. -# 3. Load field boundaries +# 5. Load field boundaries # ---------------------- load_field_boundaries <- function(data_dir) { # Choose field boundaries file based on project and script type @@ -279,7 +306,7 @@ load_field_boundaries <- function(data_dir) { }) } -# 4. Load harvesting data +# 6. Load harvesting data # --------------------- load_harvesting_data <- function(data_dir) { harvest_file <- here(data_dir, "harvest.xlsx") @@ -370,7 +397,7 @@ log_head <- function(list, level = "INFO") { log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) } -# 6. Set up full logging system with file output +# 8. Set up full logging system with file output # ------------------------------------------- setup_logging <- function(log_dir) { log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log")) @@ -402,7 +429,7 @@ setup_logging <- function(log_dir) { )) } -# 7. Initialize the project +# 9. Initialize the project # ---------------------- # Export project directories and settings initialize_project <- function(project_dir, data_source = "merged_tif_8b") { diff --git a/r_app/run_full_pipeline.R b/r_app/run_full_pipeline.R index 0e1cc0a..50bc56c 100644 --- a/r_app/run_full_pipeline.R +++ b/r_app/run_full_pipeline.R @@ -30,19 +30,48 @@ # ============================================================================== # *** EDIT THESE VARIABLES *** -end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date() -offset <- 7 # days to look back -project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba" +end_date <- as.Date("2025-12-31") # or specify: as.Date("2026-01-27") , Sys.Date() +offset <- 7 # days to look back +project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba" data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist # *************************** +# Load client type mapping from parameters_project.R +source("r_app/parameters_project.R") +client_type <- get_client_type(project_dir) +cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type)) + # Format dates end_date_str <- format(as.Date(end_date), "%Y-%m-%d") # Track success of pipeline pipeline_success <- TRUE +# Define conditional script execution based on client type +# Client types: +# - "cane_supply": Runs Scripts 20,21,22,23,30,31,80,91 (full pipeline with Excel output) +# - "agronomic_support": Runs Scripts 20,30,80,90 only (KPI calculation + Word report) +# +# Scripts that ALWAYS run (regardless of client type): +# - 00: Python Download +# - 10: Tiling (if outputs don't exist) +# - 20: CI Extraction +# - 30: Growth Model +# - 40: Mosaic Creation +# - 80: KPI Calculation +# +# Scripts that are client-type specific: +# - 21: CI RDS→CSV (cane_supply only) +# - 22: (cane_supply only) +# - 23: (cane_supply only) +# - 31: Harvest Imminent (cane_supply only) +# - 90: Legacy Word Report (agronomic_support only) +# - 91: Modern Excel Report (cane_supply only) +skip_cane_supply_only <- (client_type != "cane_supply") # Skip Scripts 21,22,23,31 for non-cane_supply +run_legacy_report <- (client_type == "agronomic_support") # Script 90 for agronomic support +run_modern_report <- (client_type == "cane_supply") # Script 91 for cane supply + # ============================================================================== # INTELLIGENT CHECKING: What has already been completed? # ============================================================================== @@ -75,16 +104,21 @@ detect_mosaic_mode_simple <- function(project_dir) { mosaic_mode <- detect_mosaic_mode_simple(project_dir) cat(sprintf("Auto-detected mosaic mode: %s\n", mosaic_mode)) -# Check Script 10 outputs - look for daily_tiles_split/{GRID_SIZE} (flexible grid detection) +# Check Script 10 outputs - FLEXIBLE: look for tiles either directly OR in grid subdirs tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split") tiles_dates <- c() if (dir.exists(tiles_split_base)) { - # Look for any grid-size subdirectories (5x5, 10x10, etc.) + # Try grid-size subdirectories first (5x5, 10x10, etc.) - preferred new structure subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + if (length(grid_patterns) > 0) { + # New structure: daily_tiles_split/{grid_size}/{dates}/ grid_dir <- file.path(tiles_split_base, grid_patterns[1]) tiles_dates <- list.dirs(grid_dir, full.names = FALSE, recursive = FALSE) + } else { + # Old structure: daily_tiles_split/{dates}/ (no grid-size subfolder) + tiles_dates <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) } } cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates))) @@ -103,23 +137,28 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files))) # For now, just note that CSV is time-dependent, not a good skip indicator cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n") -# Check Script 40 outputs (mosaics) - flexible detection for both tile-based and single-file +# Check Script 40 outputs (mosaics) - check for THIS WEEK's mosaic specifically +# (important for Script 80, which needs the current week's mosaic) +current_week <- as.numeric(format(end_date, "%V")) +current_year <- as.numeric(format(end_date, "%Y")) +week_mosaic_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) + mosaic_files <- c() if (mosaic_mode == "tiled") { - # For tile-based: look in weekly_tile_max/{grid_size}/ + # For tile-based: look in weekly_tile_max/{grid_size}/ for this week's file weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) if (length(grid_patterns) > 0) { mosaic_dir <- file.path(weekly_tile_max, grid_patterns[1]) - mosaic_files <- list.files(mosaic_dir, pattern = "\\.tif$") + mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern) } } else if (mosaic_mode == "single-file") { - # For single-file: look in weekly_mosaic/ + # For single-file: look in weekly_mosaic/ for this week's file mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") - mosaic_files <- list.files(mosaic_dir, pattern = "^week_.*\\.tif$") + mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern) } -cat(sprintf("Script 40: %d mosaic files exist\n", length(mosaic_files))) +cat(sprintf("Script 40: %d mosaic files exist for week %02d\n", length(mosaic_files), current_week)) # Check Script 80 outputs (KPIs in reports/kpis/field_stats) kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") @@ -130,19 +169,29 @@ kpi_files <- if (dir.exists(kpi_dir)) { } cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files))) -# Determine if scripts should run based on outputs -skip_10 <- length(tiles_dates) > 0 && !force_rerun -skip_20 <- length(ci_files) > 0 && !force_rerun -skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped -skip_40 <- length(mosaic_files) > 0 && !force_rerun -skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks +# Determine if scripts should run based on outputs AND client type +skip_10 <- (length(tiles_dates) > 0 && !force_rerun) # Always check tiles +skip_20 <- FALSE # Script 20 ALWAYS runs for all client types - processes new downloaded data +skip_21 <- skip_cane_supply_only # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion) +skip_22 <- skip_cane_supply_only # Script 22 runs ONLY for cane_supply clients +skip_23 <- skip_cane_supply_only # Script 23 runs ONLY for cane_supply clients +skip_30 <- FALSE # Script 30 ALWAYS runs for all client types +skip_31 <- skip_cane_supply_only # Script 31 runs ONLY for cane_supply clients +skip_40 <- (length(mosaic_files) > 0 && !force_rerun) # Always check mosaics +skip_80 <- FALSE # Script 80 ALWAYS runs for all client types - calculates KPIs for current week -cat("\nSkipping decisions:\n") -cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP (tiles exist)" else "RUN")) -cat(sprintf(" Script 20: %s\n", if(skip_20) "SKIP (CI exists)" else "RUN")) -cat(sprintf(" Script 21: %s\n", if(skip_21) "SKIP (CI exists)" else "RUN")) -cat(sprintf(" Script 40: %s\n", if(skip_40) "SKIP (mosaics exist)" else "RUN")) -cat(sprintf(" Script 80: %s\n", if(skip_80) "SKIP (KPIs exist)" else "RUN")) +cat("\nSkipping decisions (based on outputs AND client type):\n") +cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP" else "RUN")) +cat(sprintf(" Script 20: RUN (always runs to process new downloads)\n")) +cat(sprintf(" Script 21: %s %s\n", if(skip_21) "SKIP" else "RUN", if(skip_cane_supply_only && !skip_21) "(non-cane_supply client)" else "")) +cat(sprintf(" Script 22: %s %s\n", if(skip_22) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else "")) +cat(sprintf(" Script 23: %s %s\n", if(skip_23) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else "")) +cat(sprintf(" Script 30: %s (always runs)\n", if(skip_30) "SKIP" else "RUN")) +cat(sprintf(" Script 31: %s %s\n", if(skip_31) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else "")) +cat(sprintf(" Script 40: %s %s\n", if(skip_40) "SKIP" else "RUN", if(!skip_40) "" else "(mosaics exist)")) +cat(sprintf(" Script 80: %s (always runs)\n", if(skip_80) "SKIP" else "RUN")) +cat(sprintf(" Script 90: %s %s\n", if(!run_legacy_report) "SKIP" else "RUN", if(run_legacy_report) "(agronomic_support legacy report)" else "")) +cat(sprintf(" Script 91: %s %s\n", if(!run_modern_report) "SKIP" else "RUN", if(run_modern_report) "(cane_supply modern report)" else "")) # ============================================================================== # PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY) @@ -157,18 +206,28 @@ tryCatch({ existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) - # Get existing dates from tiles (better indicator of completion) + # Get existing dates from tiles (better indicator of completion for tiled projects) existing_tile_dates <- tiles_dates + # For single-file projects, use raw TIFF files as the indicator instead + # This prevents re-downloading data that already exists + if (mosaic_mode == "single-file" && length(existing_tiff_dates) > 0) { + existing_tile_dates <- existing_tiff_dates + } + # Find missing dates in the window start_date <- end_date - offset date_seq <- seq(start_date, end_date, by = "day") target_dates <- format(date_seq, "%Y-%m-%d") - # Only download if tiles don't exist yet (more reliable than checking raw TIFFs) + # Only download if files don't exist yet (tiles for tiled projects, TIFFs for single-file) missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)] - cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates))) + if (mosaic_mode == "single-file") { + cat(sprintf(" Existing TIFF dates: %d\n", length(existing_tile_dates))) + } else { + cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates))) + } cat(sprintf(" Missing dates in window: %d\n", length(missing_dates))) # Download each missing date @@ -217,6 +276,12 @@ tryCatch({ if (pipeline_success && !skip_10) { cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n") tryCatch({ + # CRITICAL: Save global variables before sourcing Script 10 (it overwrites end_date, offset, etc.) + saved_end_date <- end_date + saved_offset <- offset + saved_project_dir <- project_dir + saved_data_source <- data_source + # Set environment variables for the script (Script 10 uses these for filtering) assign("PROJECT", project_dir, envir = .GlobalEnv) @@ -225,6 +290,12 @@ if (pipeline_success && !skip_10) { source("r_app/10_create_master_grid_and_split_tiffs.R") sink() + # CRITICAL: Restore global variables after sourcing Script 10 + end_date <- saved_end_date + offset <- saved_offset + project_dir <- saved_project_dir + data_source <- saved_data_source + # Verify output tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") if (dir.exists(tiles_dir)) { @@ -248,14 +319,15 @@ if (pipeline_success && !skip_10) { if (pipeline_success && !skip_20) { cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n") tryCatch({ - # Set environment variables for the script - assign("end_date", end_date, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) - assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) + # Run Script 20 via system() to pass command-line args just like from terminal + # Arguments: end_date offset project_dir data_source + cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/20_ci_extraction.R "%s" %d "%s" "%s"', + format(end_date, "%Y-%m-%d"), offset, project_dir, data_source) + result <- system(cmd) - source("r_app/20_ci_extraction.R") - main() # Call main() to execute the script with the environment variables + if (result != 0) { + stop("Script 20 exited with error code:", result) + } # Verify CI output was created ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") @@ -306,17 +378,18 @@ if (pipeline_success && !skip_21) { # ============================================================================== # SCRIPT 30: INTERPOLATE GROWTH MODEL # ============================================================================== -if (pipeline_success) { +if (pipeline_success && !skip_30) { cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") tryCatch({ - # Set environment variables for the script - assign("end_date", end_date, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) - assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) + # Run Script 30 via system() to pass command-line args just like from terminal + # Script 30 expects: project_dir as first argument only + cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/30_interpolate_growth_model.R "%s"', + project_dir) + result <- system(cmd) - source("r_app/30_interpolate_growth_model.R") - main() # Call main() to execute the script with the environment variables + if (result != 0) { + stop("Script 30 exited with error code:", result) + } # Verify interpolated output growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated") @@ -335,7 +408,7 @@ if (pipeline_success) { # ============================================================================== # PYTHON 31: HARVEST IMMINENT WEEKLY # ============================================================================== -if (pipeline_success) { +if (pipeline_success && !skip_31) { cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n") tryCatch({ # Run Python script in pytorch_gpu conda environment @@ -364,6 +437,8 @@ if (pipeline_success) { setwd(original_dir) cat("⚠ Script 31 error:", e$message, "\n") }) +} else if (skip_31) { + cat("\n========== SKIPPING SCRIPT 31 (non-cane_supply client type) ==========\n") } # ============================================================================== @@ -372,20 +447,41 @@ if (pipeline_success) { if (pipeline_success && !skip_40) { cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n") tryCatch({ - # Set environment variables for the script - assign("end_date", end_date, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) - assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) + # Run Script 40 via system() to pass command-line args just like from terminal + # Use full path and --vanilla to avoid renv/environment issues + # Arguments: end_date offset project_dir (file_name_tif is auto-generated from dates) + cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/40_mosaic_creation.R "%s" %d "%s"', + format(end_date, "%Y-%m-%d"), offset, project_dir) + result <- system(cmd) - source("r_app/40_mosaic_creation.R") - main() # Call main() to execute the script with the environment variables + if (result != 0) { + stop("Script 40 exited with error code:", result) + } - # Verify mosaic output - mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") - if (dir.exists(mosaic_dir)) { - files <- list.files(mosaic_dir, pattern = "\\.tif$") - cat(sprintf("✓ Script 40 completed - generated %d mosaic files\n", length(files))) + # Verify mosaic output - check based on mosaic mode (tiled vs single-file) + mosaic_files_check <- c() + if (mosaic_mode == "tiled") { + mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") + if (dir.exists(mosaic_dir)) { + # Check for current week's file only + current_week_check <- as.numeric(format(end_date, "%V")) + current_year_check <- as.numeric(format(end_date, "%Y")) + week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check) + mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check) + } + } else { + mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") + if (dir.exists(mosaic_dir)) { + # Check for current week's file only + current_week_check <- as.numeric(format(end_date, "%V")) + current_year_check <- as.numeric(format(end_date, "%Y")) + week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check) + mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check) + } + } + + if (length(mosaic_files_check) > 0) { + cat(sprintf("✓ Script 40 completed - created mosaic for week %02d\n", current_week)) } else { cat("✓ Script 40 completed\n") } @@ -403,16 +499,15 @@ if (pipeline_success && !skip_40) { if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the current week cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n") tryCatch({ - # Set environment variables for the script (Script 80's main() uses these as fallbacks) - # NOTE: end_date is already a Date, just assign directly without as.Date() - assign("end_date", end_date, envir = .GlobalEnv) - assign("end_date_str", end_date_str, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) - assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) + # Run Script 80 via system() to pass command-line args just like from terminal + # Use full path and --vanilla to avoid renv/environment issues + cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/80_calculate_kpis.R "%s" %d "%s" "%s"', + format(end_date, "%Y-%m-%d"), offset, project_dir, data_source) + result <- system(cmd) - source("r_app/80_calculate_kpis.R") - main() # Call main() to execute the script with the environment variables + if (result != 0) { + stop("Script 80 exited with error code:", result) + } # Verify KPI output kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") @@ -424,8 +519,6 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur } }, error = function(e) { cat("✗ Error in Script 80:", e$message, "\n") - cat("Full error:\n") - print(e) pipeline_success <<- FALSE }) }