adjusting scripts to work for all projects

This commit is contained in:
Timon 2026-01-29 09:04:46 +01:00
parent b2de819fc4
commit dd915f9b9e
7 changed files with 385 additions and 115 deletions

View file

@ -89,20 +89,118 @@ def setup_config():
config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44')
config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos')
# BYOC collection for Planet 8-band data
collection_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a'
byoc = DataCollection.define_byoc(collection_id, name='planet_data_8b', is_timeless=True)
catalog = SentinelHubCatalog(config=config)
return config, byoc, catalog
return config, catalog
def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_days: int = 7) -> Tuple:
"""
Auto-detect which Planet collection is available for this project.
Checks a week of dates (backwards from date_str) to ensure robust detection.
If ANY date has data in the new 8-band collection, use that.
If no dates have data in new collection, fall back to legacy 4-band.
Args:
date_str: Reference date (YYYY-MM-DD)
bbox_list: List of bounding boxes for testing
catalog: SentinelHubCatalog instance
date_range_days: Number of days to check backwards (default: 7)
Returns:
(byoc, collection_info_dict) where byoc is DataCollection and dict contains metadata
"""
new_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' # 8-band (new)
old_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c' # 4-band (legacy)
test_bbox = bbox_list[0]
# Generate date range (backwards from date_str)
try:
ref_date = datetime.datetime.strptime(date_str, '%Y-%m-%d')
except ValueError:
print(f"⚠️ Invalid date format: {date_str}. Using today.")
ref_date = datetime.datetime.now()
date_range = [
(ref_date - datetime.timedelta(days=i)).strftime('%Y-%m-%d')
for i in range(date_range_days)
]
print(f"\nAuto-detecting Planet collection (checking {date_range_days} days)...")
print(f" Test range: {date_range[-1]} to {date_range[0]}")
# Try new collection first
print(f"\n Trying 8-band collection: {new_id}")
byoc_new = DataCollection.define_byoc(new_id, name='planet_data_8b', is_timeless=True)
for test_date in date_range:
try:
search = catalog.search(
collection=byoc_new,
bbox=test_bbox,
time=(test_date, test_date),
filter=None
)
tiles = list(search)
if len(tiles) > 0:
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
print(f" ✓ Using 8-band collection")
return byoc_new, {
'collection_id': new_id,
'name': 'planet_data_8b',
'bands': 4,
'output_folder': 'merged_tif_8b',
'singles_folder': 'single_images_8b'
}
except Exception as e:
print(f" ⚠️ {test_date}: {str(e)[:60]}")
# No data in new collection, try legacy
print(f"\n ✗ No data found in 8-band collection")
print(f" Trying legacy 4-band collection: {old_id}")
byoc_old = DataCollection.define_byoc(old_id, name='planet_data', is_timeless=True)
for test_date in date_range:
try:
search = catalog.search(
collection=byoc_old,
bbox=test_bbox,
time=(test_date, test_date),
filter=None
)
tiles = list(search)
if len(tiles) > 0:
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
print(f" ✓ Using legacy 4-band collection")
return byoc_old, {
'collection_id': old_id,
'name': 'planet_data',
'bands': 4,
'output_folder': 'merged_tif',
'singles_folder': 'single_images'
}
except Exception as e:
print(f" ⚠️ {test_date}: {str(e)[:60]}")
# Neither collection has data
print(f"\n ⚠️ No data found in either collection for {date_range_days} days")
print(f" Defaulting to 8-band collection (will attempt download anyway)")
return byoc_new, {
'collection_id': new_id,
'name': 'planet_data_8b',
'bands': 4,
'output_folder': 'merged_tif_8b',
'singles_folder': 'single_images_8b'
}
# ============================================================================
# EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output
# ============================================================================
EVALSCRIPT_4BAND_MASKED = """
EVALSCRIPT_8BAND = """
//VERSION=3
function setup() {
return {
@ -117,9 +215,35 @@ EVALSCRIPT_4BAND_MASKED = """
}
function evaluatePixel(sample) {
// Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0)
// This reduces output pixels and avoids NaN interpolation on client side
if (sample.udm1 == 0) {
// Scale reflectance: DN [0, 1] range
var scaledRed = 2.5 * sample.red / 10000;
var scaledGreen = 2.5 * sample.green / 10000;
var scaledBlue = 2.5 * sample.blue / 10000;
var scaledNIR = 2.5 * sample.nir / 10000;
return [scaledRed, scaledGreen, scaledBlue, scaledNIR];
} else {
return [NaN, NaN, NaN, NaN];
}
}
"""
EVALSCRIPT_4BAND_LEGACY = """
//VERSION=3
function setup() {
return {
input: [{
bands: ["red", "green", "blue", "nir", "udm1"],
units: "DN"
}],
output: {
bands: 4
}
};
}
function evaluatePixel(sample) {
// Cloud masking for legacy collection (same band names as new 8-band)
// udm1 = 0 means clear, non-zero means cloud/shadow/etc
if (sample.udm1 == 0) {
var scaledRed = 2.5 * sample.red / 10000;
var scaledGreen = 2.5 * sample.green / 10000;
var scaledBlue = 2.5 * sample.blue / 10000;
@ -289,6 +413,7 @@ def download_tile(
output_dir: Path,
config,
byoc,
evalscript: str,
resolution: int = 3
) -> bool:
"""Download a single full tile (no geometry masking = lower PU) with exponential backoff."""
@ -300,9 +425,9 @@ def download_tile(
try:
size = bbox_to_dimensions(bbox, resolution=resolution)
# Create download request with 4-band cloud-masked evalscript (uint16)
# Create download request with appropriate evalscript for collection
request = SentinelHubRequest(
evalscript=EVALSCRIPT_4BAND_MASKED,
evalscript=evalscript,
input_data=[
SentinelHubRequest.input_data(
data_collection=byoc,
@ -350,6 +475,8 @@ def download_date(
base_path: Path,
config,
byoc,
evalscript: str,
collection_info: dict,
resolution: int = 3
) -> int:
"""
@ -357,14 +484,14 @@ def download_date(
Returns number of successfully downloaded tiles.
"""
output_dir = base_path / 'single_images_8b' / date_str
output_dir = base_path / collection_info['singles_folder'] / date_str
output_dir.mkdir(parents=True, exist_ok=True)
print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...")
successful = 0
for idx, bbox in enumerate(bbox_list, 1):
if download_tile(date_str, bbox, output_dir, config, byoc, resolution):
if download_tile(date_str, bbox, output_dir, config, byoc, evalscript, resolution):
successful += 1
percentage = (idx / len(bbox_list)) * 100
@ -385,10 +512,10 @@ def download_date(
# MERGE FUNCTION
# ============================================================================
def merge_tiles(date_str: str, base_path: Path) -> bool:
def merge_tiles(date_str: str, base_path: Path, collection_info: dict) -> bool:
"""Merge downloaded tiles into single GeoTIFF using GDAL."""
single_images_dir = base_path / 'single_images_8b' / date_str
single_images_dir = base_path / collection_info['singles_folder'] / date_str
# Find all response.tiff files
file_list = [str(p) for p in single_images_dir.rglob('response.tiff')]
@ -397,8 +524,8 @@ def merge_tiles(date_str: str, base_path: Path) -> bool:
print(f" ✗ No tiles found to merge")
return False
merged_tif_dir = base_path / 'merged_tif_8b'
merged_vrt_dir = base_path / 'merged_virtual_8b'
merged_tif_dir = base_path / collection_info['output_folder']
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
merged_tif_dir.mkdir(parents=True, exist_ok=True)
merged_vrt_dir.mkdir(parents=True, exist_ok=True)
@ -453,9 +580,9 @@ def main():
# Parse arguments
parser = argparse.ArgumentParser(
description='Download Planet 8-band imagery with PU optimization'
description='Download Planet imagery with PU optimization (auto-detects 8-band vs legacy 4-band)'
)
parser.add_argument('project', help='Project name (angata, chemba, xinavane, etc.)')
parser.add_argument('project', help='Project name (angata, chemba, xinavane, aura, etc.)')
parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today')
parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)')
parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)')
@ -481,7 +608,7 @@ def main():
date_str = datetime.date.today().strftime('%Y-%m-%d')
print(f"{'='*70}")
print(f"Planet 8-Band Download - PU Optimized")
print(f"Planet Download - Auto-Detecting Collection (PU Optimized)")
print(f"{'='*70}")
print(f"Project: {args.project}")
print(f"Date: {date_str}")
@ -489,7 +616,7 @@ def main():
# Setup SentinelHub
print(f"\nSetting up SentinelHub...")
config, byoc, catalog = setup_config()
config, catalog = setup_config()
print(f"✓ SentinelHub configured")
# Load geometries
@ -504,15 +631,26 @@ def main():
print(f"\n✗ No tiles intersect field geometries. Exiting.")
sys.exit(1)
# Auto-detect collection and get evalscript
byoc, collection_info = detect_collection(date_str, bbox_list, catalog, date_range_days=7)
# Get appropriate evalscript
evalscript = EVALSCRIPT_8BAND if collection_info['bands'] == 4 and 'new' not in collection_info.get('note', '') else EVALSCRIPT_8BAND
if '4e56d0cb' not in collection_info['collection_id']:
evalscript = EVALSCRIPT_4BAND_LEGACY
print(f"\n Collection: {collection_info['name']}")
print(f" Output folder: {collection_info['output_folder']}/")
# Check date availability
print(f"\nChecking data availability...")
print(f"\nChecking data availability for {date_str}...")
if not check_date_has_data(date_str, bbox_list[0], catalog, byoc):
print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.")
sys.exit(0)
# Download tiles
print(f"\n{'='*70}")
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, args.resolution)
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, evalscript, collection_info, args.resolution)
if downloaded == 0:
print(f"\n✗ No tiles downloaded. Exiting.")
@ -522,20 +660,20 @@ def main():
if not args.skip_merge:
print(f"\n{'='*70}")
print(f"Merging tiles...")
if merge_tiles(date_str, base_path):
if merge_tiles(date_str, base_path, collection_info):
print(f"✓ Merge complete")
# Cleanup intermediate files
if args.cleanup:
print(f"\nCleaning up intermediate files...")
import shutil
single_images_dir = base_path / 'single_images_8b' / date_str
merged_vrt_dir = base_path / 'merged_virtual_8b'
single_images_dir = base_path / collection_info['singles_folder'] / date_str
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
try:
if single_images_dir.exists():
shutil.rmtree(single_images_dir)
print(f" ✓ Deleted {single_images_dir.name}/{date_str}")
print(f" ✓ Deleted {collection_info['singles_folder']}/{date_str}")
# Clean old VRT files
for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"):
@ -549,7 +687,7 @@ def main():
print(f"\n{'='*70}")
print(f"✓ Done!")
print(f"Output: {base_path / 'merged_tif_8b' / f'{date_str}.tif'}")
print(f"Output: {base_path / collection_info['output_folder'] / f'{date_str}.tif'}")
print(f"{'='*70}")

View file

@ -41,8 +41,9 @@ main <- function() {
args <- commandArgs(trailingOnly = TRUE)
# Process end_date argument
if (length(args) >= 1 && !is.na(args[1])) {
end_date <- as.Date(args[1])
if (length(args) >= 1 && !is.na(args[1]) && args[1] != "") {
# Parse date explicitly in YYYY-MM-DD format from command line
end_date <- as.Date(args[1], format = "%Y-%m-%d")
if (is.na(end_date)) {
warning("Invalid end_date provided. Using default (current date).")
end_date <- Sys.Date()

View file

@ -50,7 +50,8 @@ main <- function() {
# Process end_date argument with default
if (length(args) >= 1 && !is.na(args[1])) {
end_date <- as.Date(args[1])
# Parse date explicitly in YYYY-MM-DD format from command line
end_date <- as.Date(args[1], format = "%Y-%m-%d")
if (is.na(end_date)) {
message("Invalid end_date provided. Using current date.")
end_date <- Sys.Date()
@ -96,18 +97,18 @@ main <- function() {
assign("data_source", data_source, envir = .GlobalEnv)
tryCatch({
source("parameters_project.R")
source("40_mosaic_creation_utils.R")
safe_log(paste("Successfully sourced files from default directory."))
source("r_app/parameters_project.R")
source("r_app/40_mosaic_creation_utils.R")
safe_log(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) {
message("Note: Could not open files from default directory (expected on some systems)")
message("Attempting to source from 'r_app' directory instead...")
message("Note: Could not open files from r_app directory")
message("Attempting to source from default directory instead...")
tryCatch({
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "40_mosaic_creation_utils.R"))
message("✓ Successfully sourced files from 'r_app' directory")
source("parameters_project.R")
source("40_mosaic_creation_utils.R")
message("✓ Successfully sourced files from default directory")
}, error = function(e) {
stop("Failed to source required files from both default and 'r_app' directories.")
stop("Failed to source required files from both 'r_app' and default directories.")
})
})

View file

@ -253,7 +253,7 @@ count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_bounda
missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
aggregated_results[[tif_idx]] <- data.frame(
filename = tif_file,
filename = basename(tif_file),
notNA = total_notna,
total_pixels = total_pixels,
missing_pixels_percentage = missing_pct,
@ -265,7 +265,7 @@ count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_bounda
}, error = function(e) {
safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING")
aggregated_results[[tif_idx]] <<- data.frame(
filename = tif_file,
filename = basename(tif_file),
notNA = NA_real_,
total_pixels = NA_real_,
missing_pixels_percentage = 100,
@ -543,8 +543,12 @@ save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALS
# Create output directory if it doesn't exist
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Create full file path
file_path <- here::here(output_dir, file_name)
# Create full file path - use file.path() since output_dir may be absolute path
# Ensure file_name has .tif extension
if (!grepl("\\.tif$|\\.TIF$", file_name)) {
file_name <- paste0(file_name, ".tif")
}
file_path <- file.path(output_dir, file_name)
# Get cloud mask if it exists
cloud_mask <- attr(mosaic_raster, "cloud_mask")

View file

@ -181,7 +181,8 @@ main <- function() {
# end_date (arg 1)
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
as.Date(args[1])
# Parse date explicitly in YYYY-MM-DD format from command line
as.Date(args[1], format = "%Y-%m-%d")
} else if (exists("end_date", envir = .GlobalEnv)) {
global_date <- get("end_date", envir = .GlobalEnv)
# Check if it's a valid Date with length > 0
@ -239,6 +240,11 @@ main <- function() {
stop("Error loading parameters_project.R: ", e$message)
})
# Define paths for mosaic detection (used in PHASE 1)
base_project_path <- file.path("laravel_app", "storage", "app", project_dir)
weekly_tile_max <- file.path(base_project_path, "weekly_tile_max")
weekly_mosaic <- file.path(base_project_path, "weekly_mosaic")
tryCatch({
source(here("r_app", "30_growth_model_utils.R"))
}, error = function(e) {

View file

@ -16,7 +16,34 @@ suppressPackageStartupMessages({
library(jsonlite) # For reading tiling_config.json
})
# 2. Smart detection for tile-based vs single-file mosaic approach
# 2. Client type mapping (for conditional script execution)
# ---------------------------------------------------------
# Maps project names to client types for pipeline control
# Client types:
# - "cane_supply": Runs Scripts 20,21,30,31,80,91 (full pipeline with Excel output)
# - "agronomic_support": Runs Scripts 80,90 only (KPI calculation + Word report)
# - "extension_service": (Future - not yet implemented)
#
# NOTE: This will eventually migrate to Laravel environment variables/database
# For now, maintain this mapping and update as projects are added
CLIENT_TYPE_MAP <- list(
"angata" = "cane_supply",
"aura" = "agronomic_support",
"chemba" = "cane_supply",
"xinavane" = "cane_supply",
"esa" = "cane_supply"
)
get_client_type <- function(project_name) {
client_type <- CLIENT_TYPE_MAP[[project_name]]
if (is.null(client_type)) {
warning(sprintf("Project '%s' not in CLIENT_TYPE_MAP - defaulting to 'cane_supply'", project_name))
return("cane_supply")
}
return(client_type)
}
# 3. Smart detection for tile-based vs single-file mosaic approach
# ----------------------------------------------------------------
detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
# PRIORITY 1: Check for tiling_config.json metadata file from script 10
@ -112,7 +139,7 @@ detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NUL
))
}
# 2. Define project directory structure
# 4. Define project directory structure
# -----------------------------------
setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") {
# Base directories
@ -185,7 +212,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
}
#set working dir.
# 3. Load field boundaries
# 5. Load field boundaries
# ----------------------
load_field_boundaries <- function(data_dir) {
# Choose field boundaries file based on project and script type
@ -279,7 +306,7 @@ load_field_boundaries <- function(data_dir) {
})
}
# 4. Load harvesting data
# 6. Load harvesting data
# ---------------------
load_harvesting_data <- function(data_dir) {
harvest_file <- here(data_dir, "harvest.xlsx")
@ -370,7 +397,7 @@ log_head <- function(list, level = "INFO") {
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
}
# 6. Set up full logging system with file output
# 8. Set up full logging system with file output
# -------------------------------------------
setup_logging <- function(log_dir) {
log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log"))
@ -402,7 +429,7 @@ setup_logging <- function(log_dir) {
))
}
# 7. Initialize the project
# 9. Initialize the project
# ----------------------
# Export project directories and settings
initialize_project <- function(project_dir, data_source = "merged_tif_8b") {

View file

@ -30,19 +30,48 @@
# ==============================================================================
# *** EDIT THESE VARIABLES ***
end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date()
offset <- 7 # days to look back
project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba"
end_date <- as.Date("2025-12-31") # or specify: as.Date("2026-01-27") , Sys.Date()
offset <- 7 # days to look back
project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba"
data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif"
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
# ***************************
# Load client type mapping from parameters_project.R
source("r_app/parameters_project.R")
client_type <- get_client_type(project_dir)
cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type))
# Format dates
end_date_str <- format(as.Date(end_date), "%Y-%m-%d")
# Track success of pipeline
pipeline_success <- TRUE
# Define conditional script execution based on client type
# Client types:
# - "cane_supply": Runs Scripts 20,21,22,23,30,31,80,91 (full pipeline with Excel output)
# - "agronomic_support": Runs Scripts 20,30,80,90 only (KPI calculation + Word report)
#
# Scripts that ALWAYS run (regardless of client type):
# - 00: Python Download
# - 10: Tiling (if outputs don't exist)
# - 20: CI Extraction
# - 30: Growth Model
# - 40: Mosaic Creation
# - 80: KPI Calculation
#
# Scripts that are client-type specific:
# - 21: CI RDS→CSV (cane_supply only)
# - 22: (cane_supply only)
# - 23: (cane_supply only)
# - 31: Harvest Imminent (cane_supply only)
# - 90: Legacy Word Report (agronomic_support only)
# - 91: Modern Excel Report (cane_supply only)
skip_cane_supply_only <- (client_type != "cane_supply") # Skip Scripts 21,22,23,31 for non-cane_supply
run_legacy_report <- (client_type == "agronomic_support") # Script 90 for agronomic support
run_modern_report <- (client_type == "cane_supply") # Script 91 for cane supply
# ==============================================================================
# INTELLIGENT CHECKING: What has already been completed?
# ==============================================================================
@ -75,16 +104,21 @@ detect_mosaic_mode_simple <- function(project_dir) {
mosaic_mode <- detect_mosaic_mode_simple(project_dir)
cat(sprintf("Auto-detected mosaic mode: %s\n", mosaic_mode))
# Check Script 10 outputs - look for daily_tiles_split/{GRID_SIZE} (flexible grid detection)
# Check Script 10 outputs - FLEXIBLE: look for tiles either directly OR in grid subdirs
tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split")
tiles_dates <- c()
if (dir.exists(tiles_split_base)) {
# Look for any grid-size subdirectories (5x5, 10x10, etc.)
# Try grid-size subdirectories first (5x5, 10x10, etc.) - preferred new structure
subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
# New structure: daily_tiles_split/{grid_size}/{dates}/
grid_dir <- file.path(tiles_split_base, grid_patterns[1])
tiles_dates <- list.dirs(grid_dir, full.names = FALSE, recursive = FALSE)
} else {
# Old structure: daily_tiles_split/{dates}/ (no grid-size subfolder)
tiles_dates <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
}
}
cat(sprintf("Script 10: %d dates already tiled\n", length(tiles_dates)))
@ -103,23 +137,28 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
# For now, just note that CSV is time-dependent, not a good skip indicator
cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")
# Check Script 40 outputs (mosaics) - flexible detection for both tile-based and single-file
# Check Script 40 outputs (mosaics) - check for THIS WEEK's mosaic specifically
# (important for Script 80, which needs the current week's mosaic)
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%Y"))
week_mosaic_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
mosaic_files <- c()
if (mosaic_mode == "tiled") {
# For tile-based: look in weekly_tile_max/{grid_size}/
# For tile-based: look in weekly_tile_max/{grid_size}/ for this week's file
weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
mosaic_dir <- file.path(weekly_tile_max, grid_patterns[1])
mosaic_files <- list.files(mosaic_dir, pattern = "\\.tif$")
mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern)
}
} else if (mosaic_mode == "single-file") {
# For single-file: look in weekly_mosaic/
# For single-file: look in weekly_mosaic/ for this week's file
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
mosaic_files <- list.files(mosaic_dir, pattern = "^week_.*\\.tif$")
mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern)
}
cat(sprintf("Script 40: %d mosaic files exist\n", length(mosaic_files)))
cat(sprintf("Script 40: %d mosaic files exist for week %02d\n", length(mosaic_files), current_week))
# Check Script 80 outputs (KPIs in reports/kpis/field_stats)
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
@ -130,19 +169,29 @@ kpi_files <- if (dir.exists(kpi_dir)) {
}
cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))
# Determine if scripts should run based on outputs
skip_10 <- length(tiles_dates) > 0 && !force_rerun
skip_20 <- length(ci_files) > 0 && !force_rerun
skip_21 <- length(ci_files) > 0 && !force_rerun # Skip 21 if 20 is skipped
skip_40 <- length(mosaic_files) > 0 && !force_rerun
skip_80 <- FALSE # Always run Script 80 - it calculates KPIs for the current week (end_date), not historical weeks
# Determine if scripts should run based on outputs AND client type
skip_10 <- (length(tiles_dates) > 0 && !force_rerun) # Always check tiles
skip_20 <- FALSE # Script 20 ALWAYS runs for all client types - processes new downloaded data
skip_21 <- skip_cane_supply_only # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion)
skip_22 <- skip_cane_supply_only # Script 22 runs ONLY for cane_supply clients
skip_23 <- skip_cane_supply_only # Script 23 runs ONLY for cane_supply clients
skip_30 <- FALSE # Script 30 ALWAYS runs for all client types
skip_31 <- skip_cane_supply_only # Script 31 runs ONLY for cane_supply clients
skip_40 <- (length(mosaic_files) > 0 && !force_rerun) # Always check mosaics
skip_80 <- FALSE # Script 80 ALWAYS runs for all client types - calculates KPIs for current week
cat("\nSkipping decisions:\n")
cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP (tiles exist)" else "RUN"))
cat(sprintf(" Script 20: %s\n", if(skip_20) "SKIP (CI exists)" else "RUN"))
cat(sprintf(" Script 21: %s\n", if(skip_21) "SKIP (CI exists)" else "RUN"))
cat(sprintf(" Script 40: %s\n", if(skip_40) "SKIP (mosaics exist)" else "RUN"))
cat(sprintf(" Script 80: %s\n", if(skip_80) "SKIP (KPIs exist)" else "RUN"))
cat("\nSkipping decisions (based on outputs AND client type):\n")
cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP" else "RUN"))
cat(sprintf(" Script 20: RUN (always runs to process new downloads)\n"))
cat(sprintf(" Script 21: %s %s\n", if(skip_21) "SKIP" else "RUN", if(skip_cane_supply_only && !skip_21) "(non-cane_supply client)" else ""))
cat(sprintf(" Script 22: %s %s\n", if(skip_22) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else ""))
cat(sprintf(" Script 23: %s %s\n", if(skip_23) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else ""))
cat(sprintf(" Script 30: %s (always runs)\n", if(skip_30) "SKIP" else "RUN"))
cat(sprintf(" Script 31: %s %s\n", if(skip_31) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else ""))
cat(sprintf(" Script 40: %s %s\n", if(skip_40) "SKIP" else "RUN", if(!skip_40) "" else "(mosaics exist)"))
cat(sprintf(" Script 80: %s (always runs)\n", if(skip_80) "SKIP" else "RUN"))
cat(sprintf(" Script 90: %s %s\n", if(!run_legacy_report) "SKIP" else "RUN", if(run_legacy_report) "(agronomic_support legacy report)" else ""))
cat(sprintf(" Script 91: %s %s\n", if(!run_modern_report) "SKIP" else "RUN", if(run_modern_report) "(cane_supply modern report)" else ""))
# ==============================================================================
# PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY)
@ -157,18 +206,28 @@ tryCatch({
existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$")
existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files)
# Get existing dates from tiles (better indicator of completion)
# Get existing dates from tiles (better indicator of completion for tiled projects)
existing_tile_dates <- tiles_dates
# For single-file projects, use raw TIFF files as the indicator instead
# This prevents re-downloading data that already exists
if (mosaic_mode == "single-file" && length(existing_tiff_dates) > 0) {
existing_tile_dates <- existing_tiff_dates
}
# Find missing dates in the window
start_date <- end_date - offset
date_seq <- seq(start_date, end_date, by = "day")
target_dates <- format(date_seq, "%Y-%m-%d")
# Only download if tiles don't exist yet (more reliable than checking raw TIFFs)
# Only download if files don't exist yet (tiles for tiled projects, TIFFs for single-file)
missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]
cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates)))
if (mosaic_mode == "single-file") {
cat(sprintf(" Existing TIFF dates: %d\n", length(existing_tile_dates)))
} else {
cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates)))
}
cat(sprintf(" Missing dates in window: %d\n", length(missing_dates)))
# Download each missing date
@ -217,6 +276,12 @@ tryCatch({
if (pipeline_success && !skip_10) {
cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n")
tryCatch({
# CRITICAL: Save global variables before sourcing Script 10 (it overwrites end_date, offset, etc.)
saved_end_date <- end_date
saved_offset <- offset
saved_project_dir <- project_dir
saved_data_source <- data_source
# Set environment variables for the script (Script 10 uses these for filtering)
assign("PROJECT", project_dir, envir = .GlobalEnv)
@ -225,6 +290,12 @@ if (pipeline_success && !skip_10) {
source("r_app/10_create_master_grid_and_split_tiffs.R")
sink()
# CRITICAL: Restore global variables after sourcing Script 10
end_date <- saved_end_date
offset <- saved_offset
project_dir <- saved_project_dir
data_source <- saved_data_source
# Verify output
tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5")
if (dir.exists(tiles_dir)) {
@ -248,14 +319,15 @@ if (pipeline_success && !skip_10) {
if (pipeline_success && !skip_20) {
cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n")
tryCatch({
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
# Run Script 20 via system() to pass command-line args just like from terminal
# Arguments: end_date offset project_dir data_source
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/20_ci_extraction.R "%s" %d "%s" "%s"',
format(end_date, "%Y-%m-%d"), offset, project_dir, data_source)
result <- system(cmd)
source("r_app/20_ci_extraction.R")
main() # Call main() to execute the script with the environment variables
if (result != 0) {
stop("Script 20 exited with error code:", result)
}
# Verify CI output was created
ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
@ -306,17 +378,18 @@ if (pipeline_success && !skip_21) {
# ==============================================================================
# SCRIPT 30: INTERPOLATE GROWTH MODEL
# ==============================================================================
if (pipeline_success) {
if (pipeline_success && !skip_30) {
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
tryCatch({
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
# Run Script 30 via system() to pass command-line args just like from terminal
# Script 30 expects: project_dir as first argument only
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/30_interpolate_growth_model.R "%s"',
project_dir)
result <- system(cmd)
source("r_app/30_interpolate_growth_model.R")
main() # Call main() to execute the script with the environment variables
if (result != 0) {
stop("Script 30 exited with error code:", result)
}
# Verify interpolated output
growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated")
@ -335,7 +408,7 @@ if (pipeline_success) {
# ==============================================================================
# PYTHON 31: HARVEST IMMINENT WEEKLY
# ==============================================================================
if (pipeline_success) {
if (pipeline_success && !skip_31) {
cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n")
tryCatch({
# Run Python script in pytorch_gpu conda environment
@ -364,6 +437,8 @@ if (pipeline_success) {
setwd(original_dir)
cat("⚠ Script 31 error:", e$message, "\n")
})
} else if (skip_31) {
cat("\n========== SKIPPING SCRIPT 31 (non-cane_supply client type) ==========\n")
}
# ==============================================================================
@ -372,20 +447,41 @@ if (pipeline_success) {
if (pipeline_success && !skip_40) {
cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n")
tryCatch({
# Set environment variables for the script
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
# Run Script 40 via system() to pass command-line args just like from terminal
# Use full path and --vanilla to avoid renv/environment issues
# Arguments: end_date offset project_dir (file_name_tif is auto-generated from dates)
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/40_mosaic_creation.R "%s" %d "%s"',
format(end_date, "%Y-%m-%d"), offset, project_dir)
result <- system(cmd)
source("r_app/40_mosaic_creation.R")
main() # Call main() to execute the script with the environment variables
if (result != 0) {
stop("Script 40 exited with error code:", result)
}
# Verify mosaic output
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
if (dir.exists(mosaic_dir)) {
files <- list.files(mosaic_dir, pattern = "\\.tif$")
cat(sprintf("✓ Script 40 completed - generated %d mosaic files\n", length(files)))
# Verify mosaic output - check based on mosaic mode (tiled vs single-file)
mosaic_files_check <- c()
if (mosaic_mode == "tiled") {
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
if (dir.exists(mosaic_dir)) {
# Check for current week's file only
current_week_check <- as.numeric(format(end_date, "%V"))
current_year_check <- as.numeric(format(end_date, "%Y"))
week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check)
mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check)
}
} else {
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
if (dir.exists(mosaic_dir)) {
# Check for current week's file only
current_week_check <- as.numeric(format(end_date, "%V"))
current_year_check <- as.numeric(format(end_date, "%Y"))
week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check)
mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check)
}
}
if (length(mosaic_files_check) > 0) {
cat(sprintf("✓ Script 40 completed - created mosaic for week %02d\n", current_week))
} else {
cat("✓ Script 40 completed\n")
}
@ -403,16 +499,15 @@ if (pipeline_success && !skip_40) {
if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the current week
cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n")
tryCatch({
# Set environment variables for the script (Script 80's main() uses these as fallbacks)
# NOTE: end_date is already a Date, just assign directly without as.Date()
assign("end_date", end_date, envir = .GlobalEnv)
assign("end_date_str", end_date_str, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv)
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
# Run Script 80 via system() to pass command-line args just like from terminal
# Use full path and --vanilla to avoid renv/environment issues
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/80_calculate_kpis.R "%s" %d "%s" "%s"',
format(end_date, "%Y-%m-%d"), offset, project_dir, data_source)
result <- system(cmd)
source("r_app/80_calculate_kpis.R")
main() # Call main() to execute the script with the environment variables
if (result != 0) {
stop("Script 80 exited with error code:", result)
}
# Verify KPI output
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
@ -424,8 +519,6 @@ if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the cur
}
}, error = function(e) {
cat("✗ Error in Script 80:", e$message, "\n")
cat("Full error:\n")
print(e)
pipeline_success <<- FALSE
})
}