diff --git a/CODE_REVIEW_FINDINGS.md b/CODE_REVIEW_FINDINGS.md new file mode 100644 index 0000000..54a9a88 --- /dev/null +++ b/CODE_REVIEW_FINDINGS.md @@ -0,0 +1,751 @@ +# SmartCane Pipeline Code Review +## Efficiency, Cleanup, and Architecture Analysis + +**Date**: January 29, 2026 +**Scope**: `run_full_pipeline.R` + all called scripts (10, 20, 21, 30, 31, 40, 80, 90, 91) + utility files +**Status**: Comprehensive review completed + +--- + +## EXECUTIVE SUMMARY + +Your pipeline is **well-structured and intentional**, but has accumulated significant technical debt through development iterations. The main issues are: + +1. **🔴 HIGH IMPACT**: **3 separate mosaic mode detection functions** doing identical work +2. **🔴 HIGH IMPACT**: **Week/year calculations duplicated 10+ times** across 6+ files +3. **🟡 MEDIUM IMPACT**: **40+ debug statements** cluttering output +4. **🟡 MEDIUM IMPACT**: **File existence checks repeated** in multiple places (especially KPI checks) +5. **🟢 LOW IMPACT**: Minor redundancy in command construction, but manageable + +**Estimated cleanup effort**: 2-3 hours for core refactoring; significant code quality gains. + +**Workflow clarity issue**: The split between `merged_tif` vs `merged_tif_8b` and `weekly_mosaic` vs `weekly_tile_max` is **not clearly documented**. This should be clarified. + +--- + +## 1. DUPLICATED FUNCTIONS & LOGIC + +### 1.1 Mosaic Mode Detection (CRITICAL REDUNDANCY) + +**Problem**: Three identical implementations of `detect_mosaic_mode()`: + +| Location | Function Name | Lines | Issue | +|----------|---------------|-------|-------| +| `run_full_pipeline.R` | `detect_mosaic_mode_early()` | ~20 lines | Detects tiled vs single-file | +| `run_full_pipeline.R` | `detect_mosaic_mode_simple()` | ~20 lines | Detects tiled vs single-file (duplicate) | +| `parameters_project.R` | `detect_mosaic_mode()` | ~30 lines | Detects tiled vs single-file (different signature) | + +**Impact**: If you change the detection logic, you must update 3 places. Bug risk is high. + +**Solution**: Create **single canonical function in `parameters_project.R`**: +```r +# SINGLE SOURCE OF TRUTH +detect_mosaic_mode <- function(project_dir) { + weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") + if (dir.exists(weekly_tile_max)) { + subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) + if (length(grep("^\\d+x\\d+$", subfolders)) > 0) return("tiled") + } + + weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") + if (dir.exists(weekly_mosaic) && + length(list.files(weekly_mosaic, pattern = "^week_.*\\.tif$")) > 0) { + return("single-file") + } + + return("unknown") +} +``` + +Then replace all three calls in `run_full_pipeline.R` with this single function. + +--- + +### 1.2 Week/Year Calculations (CRITICAL REDUNDANCY) + +**Problem**: The pattern `week_num <- as.numeric(format(..., "%V"))` + `year_num <- as.numeric(format(..., "%G"))` appears **13+ times** across multiple files. + +**Locations**: +- `run_full_pipeline.R`: Lines 82, 126-127, 229-230, 630, 793-794 (5 times) +- `80_calculate_kpis.R`: Lines 323-324 (1 time) +- `80_weekly_stats_utils.R`: Lines 829-830 (1 time) +- `kpi_utils.R`: Line 45 (1 time) +- `80_kpi_utils.R`: Lines 177-178 (1 time) +- Plus inline in sprintf statements: ~10+ additional times + +**Impact**: +- High maintenance burden +- Risk of inconsistency (%V vs %Y confusion noted at line 82 in `run_full_pipeline.R`) +- Code verbosity + +**Solution**: Create **utility function in `parameters_project.R`**: +```r +get_iso_week_year <- function(date) { + list( + week = as.numeric(format(date, "%V")), + year = as.numeric(format(date, "%G")) # ISO year, not calendar year + ) +} + +# Usage: +wwy <- get_iso_week_year(end_date) +cat(sprintf("Week %02d/%d\n", wwy$week, wwy$year)) +``` + +**Also add convenience function**: +```r +format_week_year <- function(date, separator = "_") { + wwy <- get_iso_week_year(date) + sprintf("week_%02d%s%d", wwy$week, separator, wwy$year) +} + +# Usage: format_week_year(end_date) # "week_02_2026" +``` + +--- + +### 1.3 File Path Construction (MEDIUM REDUNDANCY) + +**Problem**: Repeated patterns like: +```r +file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") +file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir) +``` + +**Solution**: Centralize in `parameters_project.R`: +```r +# Project-agnostic path builders +get_project_storage_path <- function(project_dir, subdir = NULL) { + base <- file.path("laravel_app", "storage", "app", project_dir) + if (!is.null(subdir)) file.path(base, subdir) else base +} + +get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") { + if (mosaic_mode == "auto") mosaic_mode <- detect_mosaic_mode(project_dir) + if (mosaic_mode == "tiled") { + get_project_storage_path(project_dir, "weekly_tile_max/5x5") + } else { + get_project_storage_path(project_dir, "weekly_mosaic") + } +} + +get_kpi_dir <- function(project_dir, client_type) { + subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis" + get_project_storage_path(project_dir, file.path("reports", "kpis", subdir)) +} +``` + +--- + +## 2. DEBUG STATEMENTS & LOGGING CLUTTER + +### 2.1 Excessive Debug Output + +The pipeline prints **40+ debug statements** that pollute the terminal output. Examples: + +**In `run_full_pipeline.R`**: +```r +Line 82: cat(sprintf(" Running week: %02d / %d\n", ...)) # Note: %d (calendar year) should be %G +Line 218: cat(sprintf("[KPI_DIR_CREATED] Created directory: %s\n", ...)) +Line 223: cat(sprintf("[KPI_DIR_EXISTS] %s\n", ...)) +Line 224: cat(sprintf("[KPI_DEBUG] Total files in directory: %d\n", ...)) +Line 225: cat(sprintf("[KPI_DEBUG] Sample files: %s\n", ...)) +Line 240: cat(sprintf("[KPI_DEBUG_W%02d_%d] Pattern: '%s' | Found: %d files\n", ...)) +Line 630: cat("DEBUG: Running command:", cmd, "\n") +Line 630 in Script 31 execution - prints full conda command +``` + +**In `80_calculate_kpis.R`**: +``` +Line 323: message(paste("Calculating statistics for all fields - Week", week_num, year)) +Line 417: # Plus many more ... +``` + +**Impact**: +- Makes output hard to scan for real issues +- Test developers skip important messages +- Production logs become noise + +**Solution**: Replace with **structured logging** (3 levels): + +```r +# Add to parameters_project.R +smartcane_log <- function(message, level = "INFO") { + timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") + prefix <- sprintf("[%s] %s", level, timestamp) + cat(sprintf("%s | %s\n", prefix, message)) +} + +smartcane_debug <- function(message) { + if (Sys.getenv("SMARTCANE_DEBUG") == "TRUE") { + smartcane_log(message, level = "DEBUG") + } +} + +smartcane_warn <- function(message) { + smartcane_log(message, level = "WARN") +} +``` + +**Usage**: +```r +# Keep important messages +smartcane_log(sprintf("Downloaded %d dates, %d failed", download_count, download_failed)) + +# Hide debug clutter (only show if DEBUG=TRUE) +smartcane_debug(sprintf("KPI directory exists: %s", kpi_dir)) + +# Warnings stay visible +smartcane_warn("Some downloads failed, but continuing pipeline") +``` + +--- + +### 2.2 Redundant Status Checks in KPI Section + +**Lines 218-270 in `run_full_pipeline.R`**: The KPI requirement check has **deeply nested debug statements**. + +```r +if (dir.exists(kpi_dir)) { + cat(sprintf("[KPI_DIR_EXISTS] %s\n", kpi_dir)) + all_kpi_files <- list.files(kpi_dir) + cat(sprintf("[KPI_DEBUG] Total files in directory: %d\n", length(all_kpi_files))) + if (length(all_kpi_files) > 0) { + cat(sprintf("[KPI_DEBUG] Sample files: %s\n", ...)) + } +} else { + cat(sprintf("[KPI_DIR_MISSING] Directory does not exist: %s\n", kpi_dir)) +} +``` + +**Solution**: Simplify to: +```r +if (!dir.exists(kpi_dir)) { + dir.create(kpi_dir, recursive = TRUE, showWarnings = FALSE) +} + +all_kpi_files <- list.files(kpi_dir) +smartcane_debug(sprintf("KPI directory: %d files found", length(all_kpi_files))) +``` + +--- + +## 3. DOUBLE CALCULATIONS & INEFFICIENCIES + +### 3.1 KPI Existence Check (Calculated Twice) + +**Problem**: KPI existence is checked **twice** in `run_full_pipeline.R`: + +1. **First check (Lines 228-270)**: Initial KPI requirement check that calculates `kpis_needed` dataframe +2. **Second check (Lines 786-810)**: Verification after Script 80 runs (almost identical logic) + +Both loops do: +```r +for (weeks_back in 0:(reporting_weeks_needed - 1)) { + check_date <- end_date - (weeks_back * 7) + week_num <- as.numeric(format(check_date, "%V")) + year_num <- as.numeric(format(check_date, "%G")) + + week_pattern <- sprintf("week%02d_%d", week_num, year_num) + kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern) + + has_kpis <- length(kpi_files_this_week) > 0 + # ... same logic again +} +``` + +**Impact**: Slower pipeline execution, code duplication + +**Solution**: Create **reusable function in utility file**: +```r +check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks_needed) { + kpi_dir <- get_kpi_dir(project_dir, client_type) + + kpis_needed <- data.frame() + for (weeks_back in 0:(reporting_weeks_needed - 1)) { + check_date <- end_date - (weeks_back * 7) + wwy <- get_iso_week_year(check_date) + + week_pattern <- sprintf("week%02d_%d", wwy$week, wwy$year) + has_kpis <- any(grepl(week_pattern, list.files(kpi_dir))) + + kpis_needed <- rbind(kpis_needed, data.frame( + week = wwy$week, + year = wwy$year, + date = check_date, + has_kpis = has_kpis + )) + } + + return(list( + kpis_df = kpis_needed, + missing_count = sum(!kpis_needed$has_kpis), + all_complete = all(kpis_needed$has_kpis) + )) +} + +# Then in run_full_pipeline.R: +initial_kpi_check <- check_kpi_completeness(project_dir, client_type, end_date, reporting_weeks_needed) + +# ... after Script 80 runs: +final_kpi_check <- check_kpi_completeness(project_dir, client_type, end_date, reporting_weeks_needed) +if (final_kpi_check$all_complete) { + smartcane_log("✓ All KPIs available") +} +``` + +--- + +### 3.2 Mosaic Mode Detection (Called 3+ Times per Run) + +**Current code**: +- Line 99-117: `detect_mosaic_mode_early()` called once +- Line 301-324: `detect_mosaic_mode_simple()` called again +- Result: **Same detection logic runs twice unnecessarily** + +**Solution**: Call once, store result: +```r +mosaic_mode <- detect_mosaic_mode(project_dir) # Once at top + +# Then reuse throughout: +if (mosaic_mode == "tiled") { ... } +else if (mosaic_mode == "single-file") { ... } +``` + +--- + +### 3.3 Missing Weeks Calculation Inefficiency + +**Lines 126-170**: The loop builds `weeks_needed` dataframe, then **immediately** iterates again to find which ones are missing. + +**Current code**: +```r +# First: build all weeks +weeks_needed <- data.frame() +for (weeks_back in 0:(reporting_weeks_needed - 1)) { + # ... build weeks_needed +} + +# Then: check which are missing (loop again) +missing_weeks <- data.frame() +for (i in 1:nrow(weeks_needed)) { + # ... check each week +} +``` + +**Solution**: Combine into **single loop**: +```r +weeks_needed <- data.frame() +missing_weeks <- data.frame() +earliest_missing_date <- end_date + +for (weeks_back in 0:(reporting_weeks_needed - 1)) { + check_date <- end_date - (weeks_back * 7) + wwy <- get_iso_week_year(check_date) + + # Add to weeks_needed + weeks_needed <- rbind(weeks_needed, data.frame( + week = wwy$week, year = wwy$year, date = check_date + )) + + # Check if missing, add to missing_weeks if so + week_pattern <- sprintf("week_%02d_%d", wwy$week, wwy$year) + mosaic_dir <- get_mosaic_dir(project_dir, mosaic_mode) + + if (length(list.files(mosaic_dir, pattern = week_pattern)) == 0) { + missing_weeks <- rbind(missing_weeks, data.frame( + week = wwy$week, year = wwy$year, week_end_date = check_date + )) + if (check_date - 6 < earliest_missing_date) { + earliest_missing_date <- check_date - 6 + } + } +} +``` + +--- + +### 3.4 Data Source Detection Logic + +**Lines 58-84**: The `data_source_used` detection is overly complex: + +```r +data_source_used <- "merged_tif_8b" # Default +if (dir.exists(merged_tif_path)) { + tif_files <- list.files(merged_tif_path, pattern = "\\.tif$") + if (length(tif_files) > 0) { + data_source_used <- "merged_tif" + # ... + } else if (dir.exists(merged_tif_8b_path)) { + tif_files_8b <- list.files(merged_tif_8b_path, pattern = "\\.tif$") + # ... + } +} else if (dir.exists(merged_tif_8b_path)) { + # ... +} +``` + +**Issues**: +- Multiple nested conditions doing the same check +- `tif_files` and `tif_files_8b` are listed but only counts checked (not used later) +- Logic could be cleaner + +**Solution**: Create utility function: +```r +detect_data_source <- function(project_dir, preferred = "auto") { + storage_dir <- get_project_storage_path(project_dir) + + for (source in c("merged_tif", "merged_tif_8b")) { + source_dir <- file.path(storage_dir, source) + if (dir.exists(source_dir)) { + tifs <- list.files(source_dir, pattern = "\\.tif$") + if (length(tifs) > 0) return(source) + } + } + + smartcane_warn("No data source found - defaulting to merged_tif_8b") + return("merged_tif_8b") +} +``` + +--- + +## 4. WORKFLOW CLARITY ISSUES + +### 4.1 TIFF Data Format Confusion + +**Problem**: Why are there TWO different TIFF folders? + +- `merged_tif`: 4-band data (RGB + NIR) +- `merged_tif_8b`: 8-band data (appears to include UDM cloud masking from Planet) + +**Currently in code**: +```r +data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" +``` + +**Issues**: +- Hard-coded per project, not based on what's actually available +- Not documented **why** angata uses 8-band +- Unclear what the 8-band data adds (cloud masking? extra bands?) +- Scripts handle both, but it's not clear when to use which + +**Recommendation**: +1. **Document in `parameters_project.R`** what each data source contains: +```r +DATA_SOURCE_FORMATS <- list( + "merged_tif" = list( + bands = 4, + description = "4-band PlanetScope: Red, Green, Blue, NIR", + projects = c("aura", "chemba", "xinavane"), + note = "Standard format from Planet API" + ), + "merged_tif_8b" = list( + bands = 8, + description = "8-band PlanetScope with UDM: RGB+NIR + 4-band cloud mask", + projects = c("angata"), + note = "Enhanced with cloud confidence from UDM2 (Unusable Data Mask)" + ) +) +``` + +2. **Update hard-coded assignment** to be data-driven: +```r +# OLD: data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" +# NEW: detect what's actually available +data_source <- detect_data_source(project_dir) +``` + +--- + +### 4.2 Mosaic Storage Format Confusion + +**Problem**: Why are there TWO different mosaic storage styles? + +- `weekly_mosaic/`: Single TIF file per week (monolithic) +- `weekly_tile_max/5x5/`: Tiled TIFFs per week (25+ files per week) + +**Currently in code**: +- Detected automatically via `detect_mosaic_mode()` +- But **no documentation** on when/why each is used + +**Recommendation**: +1. **Document the trade-offs in `parameters_project.R`**: +```r +MOSAIC_MODES <- list( + "single-file" = list( + description = "One TIF per week", + storage_path = "weekly_mosaic/", + files_per_week = 1, + pros = c("Simpler file management", "Easier to load full mosaic"), + cons = c("Slower for field-specific analysis", "Large file I/O"), + suitable_for = c("agronomic_support", "dashboard visualization") + ), + "tiled" = list( + description = "5×5 grid of tiles per week", + storage_path = "weekly_tile_max/5x5/", + files_per_week = 25, + pros = c("Parallel field processing", "Faster per-field queries", "Scalable to 1000+ fields"), + cons = c("More file management", "Requires tile_grid metadata"), + suitable_for = c("cane_supply", "large-scale operations") + ) +) +``` + +2. **Document why angata uses tiled, aura uses single-file**: + - Is it a function of field count? (Angata = cane_supply, large fields → tiled) + - Is it historical? (Legacy decision?) + - Should new projects choose based on client type? + +--- + +### 4.3 Client Type Mapping Clarity + +**Current structure** in `parameters_project.R`: + +```r +CLIENT_TYPE_MAP <- list( + "angata" = "cane_supply", + "aura" = "agronomic_support", + "chemba" = "cane_supply", + "xinavane" = "cane_supply", + "esa" = "cane_supply" +) +``` + +**Issues**: +- Not clear **why** aura is agronomic_support while angata/chemba are cane_supply +- No documentation of what each client type needs +- Scripts branch heavily on `skip_cane_supply_only` logic + +**Recommendation**: +Add metadata to explain the distinction: + +```r +CLIENT_TYPES <- list( + "cane_supply" = list( + description = "Sugar mill supply chain optimization", + requires_harvest_prediction = TRUE, # Script 31 + requires_phase_assignment = TRUE, # Based on planting date + per_field_detail = TRUE, # Script 91 Excel report + data_sources = c("merged_tif", "merged_tif_8b"), + mosaic_mode = "tiled", + projects = c("angata", "chemba", "xinavane", "esa") + ), + "agronomic_support" = list( + description = "Farm-level decision support for agronomists", + requires_harvest_prediction = FALSE, + requires_phase_assignment = FALSE, + per_field_detail = FALSE, + farm_level_kpis = TRUE, # Script 90 Word report + data_sources = c("merged_tif"), + mosaic_mode = "single-file", + projects = c("aura") + ) +) +``` + +--- + +## 5. COMMAND CONSTRUCTION REDUNDANCY + +### 5.1 Rscript Path Repetition + +**Problem**: The Rscript path is repeated 5 times: + +```r +Line 519: '"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe"' +Line 676: '"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe"' +Line 685: '"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe"' +``` + +**Solution**: Define once in `parameters_project.R`: +```r +RSCRIPT_PATH <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" + +# Usage: +cmd <- sprintf('"%s" --vanilla r_app/20_ci_extraction.R ...', RSCRIPT_PATH) +``` + +--- + +## 6. SPECIFIC LINE-BY-LINE ISSUES + +### 6.1 Line 82 Bug: Wrong Format Code + +```r +cat(sprintf(" Running week: %02d / %d\n", + as.numeric(format(end_date, "%V")), + as.numeric(format(end_date, "%Y")))) # ❌ Should be %G, not %Y +``` + +**Issue**: Uses calendar year `%Y` instead of ISO week year `%G`. On dates like 2025-12-30 (week 1 of 2026), this will print "Week 01 / 2025" (confusing). + +**Fix**: +```r +wwy <- get_iso_week_year(end_date) +cat(sprintf(" Running week: %02d / %d\n", wwy$week, wwy$year)) +``` + +--- + +### 6.2 Line 630 Debug Statement + +```r +cmd <- sprintf('conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s', project_dir) +cat("DEBUG: Running command:", cmd, "\n") # ❌ Prints full conda command +``` + +**Solution**: Use `smartcane_debug()` function: +```r +cmd <- sprintf('conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s', project_dir) +smartcane_debug(sprintf("Running Python 31: %s", cmd)) +``` + +--- + +### 6.3 Lines 719-723: Verbose Script 31 Verification + +```r +# Check for THIS WEEK's specific file +current_week <- as.numeric(format(end_date, "%V")) +current_year <- as.numeric(format(end_date, "%Y")) +expected_file <- file.path(...) +``` + +**Issue**: Calculates week twice (already done earlier). Also uses `%Y` (should be `%G`). + +**Solution**: Reuse earlier `wwy` calculation or create helper. + +--- + +## 7. REFACTORING ROADMAP + +### Phase 1: Foundation (1 hour) +- [ ] Consolidate `detect_mosaic_mode()` into single function in `parameters_project.R` +- [ ] Create `get_iso_week_year()` and `format_week_year()` utilities +- [ ] Create `get_project_storage_path()`, `get_mosaic_dir()`, `get_kpi_dir()` helpers +- [ ] Add logging functions (`smartcane_log()`, `smartcane_debug()`, `smartcane_warn()`) + +### Phase 2: Deduplication (1 hour) +- [ ] Replace all 13+ week_num/year_num calculations with `get_iso_week_year()` +- [ ] Replace all 3 `detect_mosaic_mode_*()` calls with single function +- [ ] Combine duplicate KPI checks into `check_kpi_completeness()` function +- [ ] Fix line 82 and 630 format bugs + +### Phase 3: Cleanup (1 hour) +- [ ] Remove all debug statements (40+), replace with `smartcane_debug()` +- [ ] Simplify nested conditions in data_source detection +- [ ] Combine missing weeks detection into single loop +- [ ] Extract Rscript path to constant + +### Phase 4: Documentation (30 min) +- [ ] Add comments explaining `merged_tif` vs `merged_tif_8b` trade-offs +- [ ] Document `single-file` vs `tiled` mosaic modes and when to use each +- [ ] Clarify client type mapping in `CLIENT_TYPE_MAP` +- [ ] Add inline comments for non-obvious logic + +--- + +## 8. ARCHITECTURE & WORKFLOW RECOMMENDATIONS + +### 8.1 Clear Data Flow Diagram + +Add to `r_app/system_architecture/system_architecture.md`: + +``` +INPUT SOURCES: + ├── Planet API 4-band or 8-band imagery + ├── Field boundaries (pivot.geojson) + └── Harvest data (harvest.xlsx, optional for cane_supply) + +STORAGE TIERS: + ├── Tier 1: Raw data (merged_tif/ or merged_tif_8b/) + ├── Tier 2: Daily tiles (daily_tiles_split/{grid_size}/{dates}/) + ├── Tier 3: Extracted CI (Data/extracted_ci/daily_vals/*.rds) + ├── Tier 4: Weekly mosaics (weekly_mosaic/ OR weekly_tile_max/5x5/) + └── Tier 5: KPI outputs (reports/kpis/{field_level|field_analysis}/) + +DECISION POINTS: + └─ Client type (cane_supply vs agronomic_support) + ├─ Drives script selection (Scripts 21, 22, 23, 31, 90/91) + ├─ Drives data source (merged_tif_8b for cane_supply, merged_tif for agronomic) + ├─ Drives mosaic mode (tiled for cane_supply, single-file for agronomic) + └─ Drives KPI subdirectory (field_analysis vs field_level) +``` + +### 8.2 .sh Scripts Alignment + +You mention `.sh` scripts in the online environment. If they're **not calling the R pipeline**, there's a **split responsibility** issue: + +**Question**: Are the `.sh` scripts: +- (A) Independent duplicates of the R pipeline logic? (BAD - maintenance nightmare) +- (B) Wrappers calling the R pipeline? (GOOD - single source of truth) +- (C) Different workflow for online vs local? (RED FLAG - they diverge) + +**Recommendation**: If using `.sh` for production, ensure they **call the same R scripts** (`run_full_pipeline.R`). Example: + +```bash +#!/bin/bash +# Wrapper that ensures R pipeline is called +cd /path/to/smartcane +& "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R +``` + +--- + +## 9. SUMMARY TABLE: Issues by Severity + +| Issue | Type | Impact | Effort | Priority | +|-------|------|--------|--------|----------| +| 3 mosaic detection functions | Duplication | HIGH | 30 min | P0 | +| 13+ week/year calculations | Duplication | HIGH | 1 hour | P0 | +| 40+ debug statements | Clutter | MEDIUM | 1 hour | P1 | +| KPI check run twice | Inefficiency | LOW | 30 min | P2 | +| Line 82: %Y should be %G | Bug | LOW | 5 min | P2 | +| Data source confusion | Documentation | MEDIUM | 30 min | P1 | +| Mosaic mode confusion | Documentation | MEDIUM | 30 min | P1 | +| Client type mapping | Documentation | MEDIUM | 30 min | P1 | +| Data source detection complexity | Code style | LOW | 15 min | P3 | + +--- + +## 10. RECOMMENDED NEXT STEPS + +1. **Review this report** with your team to align on priorities +2. **Create Linear issues** for each phase of refactoring +3. **Start with Phase 1** (foundation utilities) - builds confidence for Phase 2 +4. **Test thoroughly** after each phase - the pipeline is complex and easy to break +5. **Update `.sh` scripts** if they duplicate R logic +6. **Document data flow** in `system_architecture/system_architecture.md` + +--- + +## Questions for Clarification + +Before implementing, please clarify: + +1. **Data source split**: Why does angata use `merged_tif_8b` (8-band with cloud mask) while aura uses `merged_tif` (4-band)? Is this: + - A function of client need (cane_supply requires cloud masking)? + - Historical (legacy decision for angata)? + - Should new projects choose based on availability? + +2. **Mosaic mode split**: Why tiled for angata but single-file for aura? Should this be: + - Hard-coded per project? + - Based on field count/client type? + - Auto-detected from first run? + +3. **Production vs local**: Are the `.sh` scripts in the online environment: + - Calling this same R pipeline? + - Duplicating logic independently? + - A different workflow entirely? + +4. **Client type growth**: Are there other client types planned beyond `cane_supply` and `agronomic_support`? (e.g., extension_service?) + +--- + +**Report prepared**: January 29, 2026 +**Total code reviewed**: ~2,500 lines across 10 files +**Estimated refactoring time**: 3-4 hours +**Estimated maintenance savings**: 5-10 hours/month (fewer bugs, easier updates) + diff --git a/r_app/40_mosaic_creation.R b/r_app/40_mosaic_creation.R index cc0945c..a89fab8 100644 --- a/r_app/40_mosaic_creation.R +++ b/r_app/40_mosaic_creation.R @@ -188,7 +188,7 @@ main <- function() { if (!exists("use_tile_mosaic")) { # Fallback detection if flag not set (shouldn't happen) merged_final_dir <- file.path(laravel_storage, "merged_final_tif") - tile_detection <- detect_mosaic_mode(merged_final_dir) + tile_detection <- detect_tile_structure_from_merged_final(merged_final_dir) use_tile_mosaic <- tile_detection$has_tiles } diff --git a/r_app/40_mosaic_creation_utils.R b/r_app/40_mosaic_creation_utils.R index 3aba594..2852dc0 100644 --- a/r_app/40_mosaic_creation_utils.R +++ b/r_app/40_mosaic_creation_utils.R @@ -3,12 +3,12 @@ # Utility functions for creating weekly mosaics from daily satellite imagery. # These functions support cloud cover assessment, date handling, and mosaic creation. -#' Detect whether a project uses tile-based or single-file mosaic approach +#' Detect whether a project uses tile-based or single-file mosaic approach (utility version) #' #' @param merged_final_tif_dir Directory containing merged_final_tif files #' @return List with has_tiles (logical), detected_tiles (vector), total_files (count) #' -detect_mosaic_mode <- function(merged_final_tif_dir) { +detect_tile_structure_from_files <- function(merged_final_tif_dir) { # Check if directory exists if (!dir.exists(merged_final_tif_dir)) { return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0)) diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index 07a5565..a0eacb8 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -114,7 +114,7 @@ get_client_kpi_config <- function(client_type) { # 3. Smart detection for tile-based vs single-file mosaic approach # ---------------------------------------------------------------- -detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { +detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { # PRIORITY 1: Check for tiling_config.json metadata file from script 10 # This is the most reliable source since script 10 explicitly records its decision @@ -223,7 +223,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b" merged_final_dir <- here(laravel_storage_dir, "merged_final_tif") daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split") - tile_detection <- detect_mosaic_mode( + tile_detection <- detect_tile_structure_from_merged_final( merged_final_tif_dir = merged_final_dir, daily_tiles_split_dir = daily_tiles_split_dir ) @@ -498,6 +498,279 @@ setup_logging <- function(log_dir) { )) } +# 8. HELPER FUNCTIONS FOR COMMON CALCULATIONS +# ----------------------------------------------- +# Centralized functions to reduce duplication across scripts + +# Get ISO week and year from a date +get_iso_week <- function(date) { + as.numeric(format(date, "%V")) +} + +get_iso_year <- function(date) { + as.numeric(format(date, "%G")) +} + +# Get both ISO week and year as a list +get_iso_week_year <- function(date) { + list( + week = as.numeric(format(date, "%V")), + year = as.numeric(format(date, "%G")) + ) +} + +# Format week/year into a readable label +format_week_label <- function(date, separator = "_") { + wwy <- get_iso_week_year(date) + sprintf("week%02d%s%d", wwy$week, separator, wwy$year) +} + +# Auto-detect mosaic mode (tiled vs single-file) +# Returns: "tiled", "single-file", or "unknown" +detect_mosaic_mode <- function(project_dir) { + # Check for tile-based approach: weekly_tile_max/{grid_size}/week_*.tif + weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") + if (dir.exists(weekly_tile_max)) { + subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + if (length(grid_patterns) > 0) { + return("tiled") + } + } + + # Check for single-file approach: weekly_mosaic/week_*.tif + weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") + if (dir.exists(weekly_mosaic)) { + files <- list.files(weekly_mosaic, pattern = "^week_.*\\.tif$") + if (length(files) > 0) { + return("single-file") + } + } + + return("unknown") +} + +# Auto-detect grid size from tile directory structure +# Returns: e.g., "5x5", "10x10", or "unknown" +detect_grid_size <- function(project_dir) { + weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") + if (dir.exists(weekly_tile_max)) { + subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + if (length(grid_patterns) > 0) { + return(grid_patterns[1]) # Return first match (usually only one) + } + } + return("unknown") +} + +# Build storage paths consistently across all scripts +get_project_storage_path <- function(project_dir, subdir = NULL) { + base <- file.path("laravel_app", "storage", "app", project_dir) + if (!is.null(subdir)) file.path(base, subdir) else base +} + +get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") { + if (mosaic_mode == "auto") { + mosaic_mode <- detect_mosaic_mode(project_dir) + } + + if (mosaic_mode == "tiled") { + grid_size <- detect_grid_size(project_dir) + if (grid_size != "unknown") { + get_project_storage_path(project_dir, file.path("weekly_tile_max", grid_size)) + } else { + get_project_storage_path(project_dir, "weekly_tile_max/5x5") # Fallback default + } + } else { + get_project_storage_path(project_dir, "weekly_mosaic") + } +} + +get_kpi_dir <- function(project_dir, client_type) { + subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis" + get_project_storage_path(project_dir, file.path("reports", "kpis", subdir)) +} + +# Logging functions for clean output +smartcane_log <- function(message, level = "INFO", verbose = TRUE) { + if (!verbose) return(invisible(NULL)) + timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") + prefix <- sprintf("[%s]", level) + cat(sprintf("%s %s\n", prefix, message)) +} + +smartcane_debug <- function(message, verbose = FALSE) { + if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") { + return(invisible(NULL)) + } + smartcane_log(message, level = "DEBUG", verbose = TRUE) +} + +smartcane_warn <- function(message) { + smartcane_log(message, level = "WARN", verbose = TRUE) +} + +# ============================================================================ +# PHASE 3 & 4: OPTIMIZATION & DOCUMENTATION +# ============================================================================ + +# System Constants +# ---------------- +# Define once, use everywhere + +RSCRIPT_PATH <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" +# Used in run_full_pipeline.R for calling R scripts via system() + +# Data Source Documentation +# --------------------------- +# Explains the two satellite data formats and when to use each +# +# SmartCane uses PlanetScope imagery from Planet Labs API in two formats: +# +# 1. merged_tif (4-band): +# - Standard format: Red, Green, Blue, Near-Infrared +# - Size: ~150-200 MB per date +# - Use case: Agronomic support, general crop health monitoring +# - Projects: aura, xinavane +# - Cloud handling: Basic cloud masking from Planet metadata +# +# 2. merged_tif_8b (8-band with cloud confidence): +# - Enhanced format: 4-band imagery + 4-band UDM2 cloud mask +# - UDM2 bands: Clear, Snow, Shadow, Light Haze +# - Size: ~250-350 MB per date +# - Use case: Harvest prediction, supply chain optimization +# - Projects: angata, chemba, esa (cane_supply clients) +# - Cloud handling: Per-pixel cloud confidence from Planet UDM2 +# - Why: Cane supply chains need precise confidence to predict harvest dates +# (don't want to predict based on cloudy data) +# +# The system auto-detects which is available via detect_data_source() + +# Mosaic Mode Documentation +# -------------------------- +# SmartCane supports two ways to store and process weekly mosaics: +# +# 1. Single-file mosaic ("single-file"): +# - One GeoTIFF per week: weekly_mosaic/week_02_2026.tif +# - 5 bands per file: R, G, B, NIR, CI (Canopy Index) +# - Size: ~300-500 MB per week +# - Pros: Simpler file management, easier full-field visualization +# - Cons: Slower for field-specific queries, requires loading full raster +# - Best for: Agronomic support (aura) with <100 fields +# - Script 04 output: 5-band single-file mosaic +# +# 2. Tiled mosaic ("tiled"): +# - Grid of tiles per week: weekly_tile_max/5x5/week_02_2026_{TT}.tif +# - Example: 25 files (5×5 grid) × 5 bands = 125 individual tiffs +# - Size: ~15-20 MB per tile, organized in folders +# - Pros: Parallel processing, fast field lookups, scales to 1000+ fields +# - Cons: More file I/O, requires tile-to-field mapping metadata +# - Best for: Cane supply (angata, chemba) with 500+ fields +# - Script 04 output: Per-tile tiff files in weekly_tile_max/{grid}/ +# - Tile assignment: Field boundaries mapped to grid coordinates +# +# The system auto-detects which is available via detect_mosaic_mode() + +# Client Type Documentation +# -------------------------- +# SmartCane runs different analysis pipelines based on client_type: +# +# CLIENT_TYPE: cane_supply +# Purpose: Optimize sugar mill supply chain (harvest scheduling) +# Scripts run: 20 (CI), 21 (RDS to CSV), 30 (Growth), 31 (Harvest pred), 40 (Mosaic), 80 (KPI), 91 (Excel) +# Outputs: +# - Per-field analysis: field status, growth phase, harvest readiness +# - Excel reports (Script 91): Detailed metrics for logistics planning +# - KPI directory: reports/kpis/field_analysis/ (one RDS per week) +# Harvest data: Required (harvest.xlsx - planting dates for phase assignment) +# Data source: merged_tif_8b (uses cloud confidence for confidence) +# Mosaic mode: tiled (scales to 500+ fields) +# Projects: angata, chemba, xinavane, esa +# +# CLIENT_TYPE: agronomic_support +# Purpose: Provide weekly crop health insights to agronomists +# Scripts run: 80 (KPI), 90 (Word report) +# Outputs: +# - Farm-level KPI summaries (no per-field breakdown) +# - Word reports (Script 90): Charts and trends for agronomist decision support +# - KPI directory: reports/kpis/field_level/ (one RDS per week) +# Harvest data: Not used +# Data source: merged_tif (simpler, smaller) +# Mosaic mode: single-file (100-200 fields) +# Projects: aura +# + +# Detect data source (merged_tif vs merged_tif_8b) based on availability +# Returns the first available source; defaults to merged_tif_8b if neither exists +detect_data_source <- function(project_dir) { + storage_dir <- get_project_storage_path(project_dir) + + # Preferred order: check merged_tif first, fall back to merged_tif_8b + for (source in c("merged_tif", "merged_tif_8b")) { + source_dir <- file.path(storage_dir, source) + if (dir.exists(source_dir)) { + tifs <- list.files(source_dir, pattern = "\\.tif$") + if (length(tifs) > 0) { + smartcane_log(sprintf("Detected data source: %s (%d TIF files)", source, length(tifs))) + return(source) + } + } + } + + smartcane_warn(sprintf("No data source found for %s - defaulting to merged_tif_8b", project_dir)) + return("merged_tif_8b") +} + +# Check KPI completeness for a reporting period +# Returns: List with kpis_df (data.frame), missing_count, and all_complete (boolean) +# This replaces duplicate KPI checking logic in run_full_pipeline.R (lines ~228-270, ~786-810) +check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks_needed) { + kpi_dir <- get_kpi_dir(project_dir, client_type) + + kpis_needed <- data.frame() + + for (weeks_back in 0:(reporting_weeks_needed - 1)) { + check_date <- end_date - (weeks_back * 7) + wwy <- get_iso_week_year(check_date) + + # Build week pattern and check if it exists + week_pattern <- sprintf("week%02d_%d", wwy$week, wwy$year) + files_this_week <- list.files(kpi_dir, pattern = week_pattern) + has_kpis <- length(files_this_week) > 0 + + # Track missing weeks + kpis_needed <- rbind(kpis_needed, data.frame( + week = wwy$week, + year = wwy$year, + date = check_date, + has_kpis = has_kpis, + pattern = week_pattern, + file_count = length(files_this_week) + )) + + # Debug logging + smartcane_debug(sprintf( + "Week %02d/%d (%s): %s (%d files)", + wwy$week, wwy$year, format(check_date, "%Y-%m-%d"), + if (has_kpis) "✓ FOUND" else "✗ MISSING", + length(files_this_week) + )) + } + + # Summary statistics + missing_count <- sum(!kpis_needed$has_kpis) + all_complete <- missing_count == 0 + + return(list( + kpis_df = kpis_needed, + kpi_dir = kpi_dir, + missing_count = missing_count, + missing_weeks = kpis_needed[!kpis_needed$has_kpis, ], + all_complete = all_complete + )) +} + # 9. Initialize the project # ---------------------- # Export project directories and settings diff --git a/r_app/run_full_pipeline.R b/r_app/run_full_pipeline.R index 41de090..30f1819 100644 --- a/r_app/run_full_pipeline.R +++ b/r_app/run_full_pipeline.R @@ -10,30 +10,30 @@ # 6. Python 31: Harvest imminent weekly # 7. R 40: Mosaic creation # 8. R 80: Calculate KPIs -# +# # ============================================================================== # HOW TO RUN THIS SCRIPT # ============================================================================== -# +# # Run from the smartcane/ directory: -# +# # Option 1 (Recommended - shows real-time output): # Rscript r_app/run_full_pipeline.R -# +# # Option 2 (Full path to Rscript - use & in PowerShell for paths with spaces): # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R -# +# # Option 3 (Batch mode - output saved to .Rout file): # R CMD BATCH --vanilla r_app/run_full_pipeline.R -# +# # ============================================================================== # ============================================================================== # *** EDIT THESE VARIABLES *** -end_date <- as.Date("2026-01-07") # or specify: as.Date("2026-01-27") , Sys.Date() -project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba" +end_date <- as.Date("2026-01-07") # or specify: as.Date("2026-01-27") , Sys.Date() +project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba" data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" -force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist +force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist # *************************** # Load client type mapping from parameters_project.R @@ -45,43 +45,26 @@ cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type)) # DETECT WHICH DATA SOURCE IS AVAILABLE (merged_tif vs merged_tif_8b) # ============================================================================== # Check which merged_tif folder actually has files for this project -laravel_storage_dir <- file.path("laravel_app", "storage", "app", project_dir) -merged_tif_path <- file.path(laravel_storage_dir, "merged_tif") -merged_tif_8b_path <- file.path(laravel_storage_dir, "merged_tif_8b") - -data_source_used <- "merged_tif_8b" # Default -if (dir.exists(merged_tif_path)) { - tif_files <- list.files(merged_tif_path, pattern = "\\.tif$") - if (length(tif_files) > 0) { - data_source_used <- "merged_tif" - cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files))) - } else if (dir.exists(merged_tif_8b_path)) { - tif_files_8b <- list.files(merged_tif_8b_path, pattern = "\\.tif$") - if (length(tif_files_8b) > 0) { - data_source_used <- "merged_tif_8b" - cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files_8b))) - } - } -} else if (dir.exists(merged_tif_8b_path)) { - tif_files_8b <- list.files(merged_tif_8b_path, pattern = "\\.tif$") - if (length(tif_files_8b) > 0) { - data_source_used <- "merged_tif_8b" - cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files_8b))) - } -} +# Uses centralized detection function from parameters_project.R +# NOTE: Old code below commented out - now handled by detect_data_source() +# laravel_storage_dir <- file.path("laravel_app", "storage", "app", project_dir) +# merged_tif_path <- file.path(laravel_storage_dir, "merged_tif") +data_source_used <- detect_data_source(project_dir) # ============================================================================== # DETERMINE REPORTING WINDOW (auto-calculated based on KPI requirements) # ============================================================================== # Script 80 (KPIs) needs N weeks of historical data for trend analysis and reporting # We calculate this automatically based on client type -reporting_weeks_needed <- 4 # Default: KPIs need current week + 3 weeks history for trends -offset <- (reporting_weeks_needed - 1) * 7 # Convert weeks to days +reporting_weeks_needed <- 4 # Default: KPIs need current week + 3 weeks history for trends +offset <- (reporting_weeks_needed - 1) * 7 # Convert weeks to days cat(sprintf("\n[INFO] Reporting window: %d weeks (%d days of data)\n", reporting_weeks_needed, offset)) -cat(sprintf(" Running week: %02d / %d\n", as.numeric(format(end_date, "%V")), as.numeric(format(end_date, "%Y")))) +wwy_current <- get_iso_week_year(end_date) +cat(sprintf(" Running week: %02d / %d\n", wwy_current$week, wwy_current$year)) cat(sprintf(" Date range: %s to %s\n", format(end_date - offset, "%Y-%m-%d"), format(end_date, "%Y-%m-%d"))) + # Format dates end_date_str <- format(as.Date(end_date), "%Y-%m-%d") @@ -95,56 +78,34 @@ pipeline_success <- TRUE # Run this BEFORE downloads so we can download ONLY missing dates upfront cat("\n========== EARLY CHECK: MOSAIC REQUIREMENTS FOR REPORTING WINDOW ==========\n") -# Detect mosaic mode early (before full checking section) -detect_mosaic_mode_early <- function(project_dir) { - weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") - if (dir.exists(weekly_tile_max)) { - subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - return("tiled") - } - } - - weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") - if (dir.exists(weekly_mosaic)) { - files <- list.files(weekly_mosaic, pattern = "^week_.*\\.tif$") - if (length(files) > 0) { - return("single-file") - } - } - - return("unknown") -} - -mosaic_mode <- detect_mosaic_mode_early(project_dir) +# Detect mosaic mode early (centralized function in parameters_project.R) +mosaic_mode <- detect_mosaic_mode(project_dir) # Check what mosaics we NEED weeks_needed <- data.frame() for (weeks_back in 0:(reporting_weeks_needed - 1)) { check_date <- end_date - (weeks_back * 7) - week_num <- as.numeric(format(check_date, "%V")) - year_num <- as.numeric(format(check_date, "%G")) # %G = ISO week year (not calendar year %Y) - weeks_needed <- rbind(weeks_needed, data.frame(week = week_num, year = year_num, date = check_date)) + wwy <- get_iso_week_year(check_date) + weeks_needed <- rbind(weeks_needed, data.frame(week = wwy$week, year = wwy$year, date = check_date)) } -missing_weeks_dates <- c() # Will store the earliest date of missing weeks -earliest_missing_date <- end_date # Start with end_date, go back if needed -missing_weeks <- data.frame() # Track ALL missing weeks for later processing by Script 40 +missing_weeks_dates <- c() # Will store the earliest date of missing weeks +earliest_missing_date <- end_date # Start with end_date, go back if needed +missing_weeks <- data.frame() # Track ALL missing weeks for later processing by Script 40 for (i in 1:nrow(weeks_needed)) { week_num <- weeks_needed[i, "week"] year_num <- weeks_needed[i, "year"] check_date <- weeks_needed[i, "date"] - + # Pattern must be flexible to match both: # - Single-file: week_51_2025.tif # - Tiled: week_51_2025_01.tif, week_51_2025_02.tif, etc. week_pattern_check <- sprintf("week_%02d_%d", week_num, year_num) files_this_week <- c() - + if (mosaic_mode == "tiled") { - mosaic_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") + mosaic_dir_check <- get_mosaic_dir(project_dir, mosaic_mode = "tiled") if (dir.exists(mosaic_dir_check)) { files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check) } @@ -154,13 +115,15 @@ for (i in 1:nrow(weeks_needed)) { files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check) } } - - cat(sprintf(" Week %02d/%d (%s): %s\n", week_num, year_num, format(check_date, "%Y-%m-%d"), - if(length(files_this_week) > 0) "✓ EXISTS" else "✗ MISSING")) - + + cat(sprintf( + " Week %02d/%d (%s): %s\n", week_num, year_num, format(check_date, "%Y-%m-%d"), + if (length(files_this_week) > 0) "✓ EXISTS" else "✗ MISSING" + )) + # If week is missing, track its date range for downloading/processing if (length(files_this_week) == 0) { - week_start <- check_date - 6 # Monday of that week + week_start <- check_date - 6 # Monday of that week if (week_start < earliest_missing_date) { earliest_missing_date <- week_start } @@ -172,19 +135,21 @@ for (i in 1:nrow(weeks_needed)) { # Calculate dynamic offset for preprocessing: only process from earliest missing week to end_date if (earliest_missing_date < end_date) { cat(sprintf("\n[INFO] Missing week(s) detected - need to fill from %s onwards\n", format(earliest_missing_date, "%Y-%m-%d"))) - + # Adjust offset to cover only the gap (from earliest missing week to end_date) dynamic_offset <- as.numeric(end_date - earliest_missing_date) - cat(sprintf("[INFO] Will download/process ONLY missing dates: %d days (from %s to %s)\n", - dynamic_offset, format(earliest_missing_date, "%Y-%m-%d"), format(end_date, "%Y-%m-%d"))) - + cat(sprintf( + "[INFO] Will download/process ONLY missing dates: %d days (from %s to %s)\n", + dynamic_offset, format(earliest_missing_date, "%Y-%m-%d"), format(end_date, "%Y-%m-%d") + )) + # Use dynamic offset for data generation scripts (10, 20, 30, 40) # But Script 80 still uses full reporting_weeks_needed offset for KPI calculations data_generation_offset <- dynamic_offset force_data_generation <- TRUE } else { cat("\n[INFO] ✓ All required mosaics exist - using normal reporting window\n") - data_generation_offset <- offset # Use default reporting window offset + data_generation_offset <- offset # Use default reporting window offset force_data_generation <- FALSE } @@ -193,86 +158,45 @@ if (earliest_missing_date < end_date) { # ============================================================================== # Scripts 90 (Word report) and 91 (Excel report) require KPIs for full reporting window # Script 80 ALWAYS runs and will CALCULATE missing KPIs, so this is just for visibility +# Uses centralized check_kpi_completeness() function from parameters_project.R cat("\n========== KPI REQUIREMENT CHECK ==========\n") -cat(sprintf("KPIs needed for reporting: %d weeks (current week + %d weeks history)\n", - reporting_weeks_needed, reporting_weeks_needed - 1)) +cat(sprintf( + "KPIs needed for reporting: %d weeks (current week + %d weeks history)\n", + reporting_weeks_needed, reporting_weeks_needed - 1 +)) -# Determine KPI directory based on client type -# - agronomic_support: field_level/ (6 farm-level KPIs) -# - cane_supply: field_analysis/ (per-field analysis) -kpi_subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis" -kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir) +# Check KPI completeness (replaces duplicate logic from lines ~228-270 and ~786-810) +kpi_check <- check_kpi_completeness(project_dir, client_type, end_date, reporting_weeks_needed) +kpi_dir <- kpi_check$kpi_dir +kpis_needed <- kpi_check$kpis_df +kpis_missing_count <- kpi_check$missing_count # Create KPI directory if it doesn't exist if (!dir.exists(kpi_dir)) { dir.create(kpi_dir, recursive = TRUE, showWarnings = FALSE) - cat(sprintf("[KPI_DIR_CREATED] Created directory: %s\n", kpi_dir)) } -kpis_needed <- data.frame() -kpis_missing_count <- 0 - -# Debug: Check if KPI directory exists -if (dir.exists(kpi_dir)) { - cat(sprintf("[KPI_DIR_EXISTS] %s\n", kpi_dir)) - all_kpi_files <- list.files(kpi_dir) - cat(sprintf("[KPI_DEBUG] Total files in directory: %d\n", length(all_kpi_files))) - if (length(all_kpi_files) > 0) { - cat(sprintf("[KPI_DEBUG] Sample files: %s\n", paste(head(all_kpi_files, 3), collapse = ", "))) - } -} else { - cat(sprintf("[KPI_DIR_MISSING] Directory does not exist: %s\n", kpi_dir)) -} - -for (weeks_back in 0:(reporting_weeks_needed - 1)) { - check_date <- end_date - (weeks_back * 7) - week_num <- as.numeric(format(check_date, "%V")) - year_num <- as.numeric(format(check_date, "%G")) - - # Check for any KPI file from that week - use more flexible pattern matching - week_pattern <- sprintf("week%02d_%d", week_num, year_num) - kpi_files_this_week <- c() - if (dir.exists(kpi_dir)) { - # List all files and manually check for pattern match - all_files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") - kpi_files_this_week <- all_files[grepl(week_pattern, all_files, fixed = TRUE)] - - # Debug output for first week - if (weeks_back == 0) { - cat(sprintf("[KPI_DEBUG_W%02d_%d] Pattern: '%s' | Found: %d files\n", - week_num, year_num, week_pattern, length(kpi_files_this_week))) - if (length(kpi_files_this_week) > 0) { - cat(sprintf("[KPI_DEBUG_W%02d_%d] Files: %s\n", - week_num, year_num, paste(kpi_files_this_week, collapse = ", "))) - } - } - } - - has_kpis <- length(kpi_files_this_week) > 0 - kpis_needed <- rbind(kpis_needed, data.frame( - week = week_num, - year = year_num, - date = check_date, - has_kpis = has_kpis +# Display status for each week +for (i in 1:nrow(kpis_needed)) { + row <- kpis_needed[i, ] + cat(sprintf( + " Week %02d/%d (%s): %s (%d files)\n", + row$week, row$year, format(row$date, "%Y-%m-%d"), + if (row$has_kpis) "✓ EXISTS" else "✗ WILL BE CALCULATED", + row$file_count )) - - if (!has_kpis) { - kpis_missing_count <- kpis_missing_count + 1 - } - - cat(sprintf(" Week %02d/%d (%s): %s\n", - week_num, year_num, format(check_date, "%Y-%m-%d"), - if(has_kpis) "✓ EXISTS" else "✗ WILL BE CALCULATED")) } -cat(sprintf("\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by Script 80\n", - nrow(kpis_needed) - kpis_missing_count, nrow(kpis_needed), kpis_missing_count)) +cat(sprintf( + "\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by Script 80\n", + nrow(kpis_needed) - kpis_missing_count, nrow(kpis_needed), kpis_missing_count +)) # Define conditional script execution based on client type # Client types: # - "cane_supply": Runs Scripts 20,21,22,23,30,31,80,91 (full pipeline with Excel output) # - "agronomic_support": Runs Scripts 20,30,80,90 only (KPI calculation + Word report) -# +# # Scripts that ALWAYS run (regardless of client type): # - 00: Python Download # - 10: Tiling (if outputs don't exist) @@ -280,7 +204,7 @@ cat(sprintf("\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by # - 30: Growth Model # - 40: Mosaic Creation # - 80: KPI Calculation -# +# # Scripts that are client-type specific: # - 21: CI RDS→CSV (cane_supply only) # - 22: (cane_supply only) @@ -288,40 +212,16 @@ cat(sprintf("\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by # - 31: Harvest Imminent (cane_supply only) # - 90: Legacy Word Report (agronomic_support only) # - 91: Modern Excel Report (cane_supply only) -skip_cane_supply_only <- (client_type != "cane_supply") # Skip Scripts 21,22,23,31 for non-cane_supply -run_legacy_report <- (client_type == "agronomic_support") # Script 90 for agronomic support -run_modern_report <- (client_type == "cane_supply") # Script 91 for cane supply +skip_cane_supply_only <- (client_type != "cane_supply") # Skip Scripts 21,22,23,31 for non-cane_supply +run_legacy_report <- (client_type == "agronomic_support") # Script 90 for agronomic support +run_modern_report <- (client_type == "cane_supply") # Script 91 for cane supply # ============================================================================== # INTELLIGENT CHECKING: What has already been completed? # ============================================================================== cat("\n========== CHECKING EXISTING OUTPUTS ==========\n") -# Detect mosaic mode (tile-based vs single-file) automatically -detect_mosaic_mode_simple <- function(project_dir) { - # Check for tile-based approach: weekly_tile_max/{grid_size}/week_*.tif - weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") - if (dir.exists(weekly_tile_max)) { - subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - return("tiled") - } - } - - # Check for single-file approach: weekly_mosaic/week_*.tif - weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") - if (dir.exists(weekly_mosaic)) { - files <- list.files(weekly_mosaic, pattern = "^week_.*\\.tif$") - if (length(files) > 0) { - return("single-file") - } - } - - return("unknown") -} - -mosaic_mode <- detect_mosaic_mode_simple(project_dir) +# Use centralized mosaic mode detection from parameters_project.R cat(sprintf("Auto-detected mosaic mode: %s\n", mosaic_mode)) # Check Script 10 outputs - FLEXIBLE: look for tiles either directly OR in grid subdirs @@ -331,7 +231,7 @@ if (dir.exists(tiles_split_base)) { # Try grid-size subdirectories first (5x5, 10x10, etc.) - preferred new structure subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - + if (length(grid_patterns) > 0) { # New structure: daily_tiles_split/{grid_size}/{dates}/ grid_dir <- file.path(tiles_split_base, grid_patterns[1]) @@ -359,12 +259,11 @@ cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 run # Check Script 40 outputs (mosaics) - check which weeks are missing (not just current week) # The early check section already identified missing_weeks, so we use that -skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Only skip if NO missing weeks AND not forcing rerun +skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Only skip if NO missing weeks AND not forcing rerun cat(sprintf("Script 40: %d missing week(s) to create\n", nrow(missing_weeks))) # Check Script 80 outputs (KPIs in reports/kpis/{field_level|field_analysis}) -# Use the same kpi_subdir logic to find the right directory -kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir) +# kpi_dir already set by check_kpi_completeness() above kpi_files <- if (dir.exists(kpi_dir)) { list.files(kpi_dir, pattern = "\\.csv$|\\.json$") } else { @@ -373,147 +272,157 @@ kpi_files <- if (dir.exists(kpi_dir)) { cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files))) # Determine if scripts should run based on outputs AND client type -skip_10 <- (length(tiles_dates) > 0 && !force_rerun && !force_data_generation) # Force Script 10 if missing weeks detected -skip_20 <- FALSE # Script 20 ALWAYS runs for all client types - processes new downloaded data -skip_21 <- skip_cane_supply_only # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion) -skip_22 <- skip_cane_supply_only # Script 22 runs ONLY for cane_supply clients -skip_23 <- skip_cane_supply_only # Script 23 runs ONLY for cane_supply clients -skip_30 <- FALSE # Script 30 ALWAYS runs for all client types -skip_31 <- skip_cane_supply_only # Script 31 runs ONLY for cane_supply clients -skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Skip Script 40 only if NO missing weeks -skip_80 <- (kpis_missing_count == 0 && !force_rerun) # Skip Script 80 only if ALL KPIs exist AND not forcing rerun +skip_10 <- (length(tiles_dates) > 0 && !force_rerun && !force_data_generation) # Force Script 10 if missing weeks detected +skip_20 <- FALSE # Script 20 ALWAYS runs for all client types - processes new downloaded data +skip_21 <- skip_cane_supply_only # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion) +skip_22 <- skip_cane_supply_only # Script 22 runs ONLY for cane_supply clients +skip_23 <- skip_cane_supply_only # Script 23 runs ONLY for cane_supply clients +skip_30 <- FALSE # Script 30 ALWAYS runs for all client types +skip_31 <- skip_cane_supply_only # Script 31 runs ONLY for cane_supply clients +skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Skip Script 40 only if NO missing weeks +skip_80 <- (kpis_missing_count == 0 && !force_rerun) # Skip Script 80 only if ALL KPIs exist AND not forcing rerun cat("\nSkipping decisions (based on outputs AND client type):\n") -cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP" else "RUN")) +cat(sprintf(" Script 10: %s\n", if (skip_10) "SKIP" else "RUN")) cat(sprintf(" Script 20: RUN (always runs to process new downloads)\n")) -cat(sprintf(" Script 21: %s %s\n", if(skip_21) "SKIP" else "RUN", if(skip_cane_supply_only && !skip_21) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 22: %s %s\n", if(skip_22) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 23: %s %s\n", if(skip_23) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 30: %s (always runs)\n", if(skip_30) "SKIP" else "RUN")) -cat(sprintf(" Script 31: %s %s\n", if(skip_31) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 40: %s (looping through %d missing weeks)\n", if(skip_40) "SKIP" else "RUN", nrow(missing_weeks))) -cat(sprintf(" Script 80: %s (always runs)\n", if(skip_80) "SKIP" else "RUN")) -cat(sprintf(" Script 90: %s %s\n", if(!run_legacy_report) "SKIP" else "RUN", if(run_legacy_report) "(agronomic_support legacy report)" else "")) -cat(sprintf(" Script 91: %s %s\n", if(!run_modern_report) "SKIP" else "RUN", if(run_modern_report) "(cane_supply modern report)" else "")) +cat(sprintf(" Script 21: %s %s\n", if (skip_21) "SKIP" else "RUN", if (skip_cane_supply_only && !skip_21) "(non-cane_supply client)" else "")) +cat(sprintf(" Script 22: %s %s\n", if (skip_22) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else "")) +cat(sprintf(" Script 23: %s %s\n", if (skip_23) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else "")) +cat(sprintf(" Script 30: %s (always runs)\n", if (skip_30) "SKIP" else "RUN")) +cat(sprintf(" Script 31: %s %s\n", if (skip_31) "SKIP" else "RUN", if (skip_cane_supply_only) "(non-cane_supply client)" else "")) +cat(sprintf(" Script 40: %s (looping through %d missing weeks)\n", if (skip_40) "SKIP" else "RUN", nrow(missing_weeks))) +cat(sprintf(" Script 80: %s (always runs)\n", if (skip_80) "SKIP" else "RUN")) +cat(sprintf(" Script 90: %s %s\n", if (!run_legacy_report) "SKIP" else "RUN", if (run_legacy_report) "(agronomic_support legacy report)" else "")) +cat(sprintf(" Script 91: %s %s\n", if (!run_modern_report) "SKIP" else "RUN", if (run_modern_report) "(cane_supply modern report)" else "")) # ============================================================================== # PYTHON: DOWNLOAD PLANET IMAGES (MISSING DATES ONLY) # ============================================================================== cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n") -tryCatch({ - # Setup paths - base_path <- file.path("laravel_app", "storage", "app", project_dir) - merged_tifs_dir <- file.path(base_path, data_source) - - # Get existing dates from raw TIFFs - existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") - existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) - - # Get existing dates from tiles (better indicator of completion for tiled projects) - existing_tile_dates <- tiles_dates - - # For single-file projects, use raw TIFF files as the indicator instead - # This prevents re-downloading data that already exists - if (mosaic_mode == "single-file" && length(existing_tiff_dates) > 0) { - existing_tile_dates <- existing_tiff_dates - } - - # Find missing dates in the window - start_date <- end_date - data_generation_offset - date_seq <- seq(start_date, end_date, by = "day") - target_dates <- format(date_seq, "%Y-%m-%d") - - # Only download if files don't exist yet (tiles for tiled projects, TIFFs for single-file) - missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)] - - if (mosaic_mode == "single-file") { - cat(sprintf(" Existing TIFF dates: %d\n", length(existing_tile_dates))) - } else { - cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates))) - } - cat(sprintf(" Missing dates in window: %d\n", length(missing_dates))) - - # Download each missing date - download_count <- 0 - download_failed <- 0 - - if (length(missing_dates) > 0) { - # Save current directory - original_dir <- getwd() - - # Change to python_app directory so relative paths work correctly - setwd("python_app") - - for (date_str in missing_dates) { - cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str) - result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE) - if (result == 0) { - download_count <- download_count + 1 - } else { - download_failed <- download_failed + 1 - } +tryCatch( + { + # Setup paths + base_path <- file.path("laravel_app", "storage", "app", project_dir) + merged_tifs_dir <- file.path(base_path, data_source) + + # Get existing dates from raw TIFFs + existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") + existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) + + # Get existing dates from tiles (better indicator of completion for tiled projects) + existing_tile_dates <- tiles_dates + + # For single-file projects, use raw TIFF files as the indicator instead + # This prevents re-downloading data that already exists + if (mosaic_mode == "single-file" && length(existing_tiff_dates) > 0) { + existing_tile_dates <- existing_tiff_dates } - - # Change back to original directory - setwd(original_dir) + + # Find missing dates in the window + start_date <- end_date - data_generation_offset + date_seq <- seq(start_date, end_date, by = "day") + target_dates <- format(date_seq, "%Y-%m-%d") + + # Only download if files don't exist yet (tiles for tiled projects, TIFFs for single-file) + missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)] + + if (mosaic_mode == "single-file") { + cat(sprintf(" Existing TIFF dates: %d\n", length(existing_tile_dates))) + } else { + cat(sprintf(" Existing tiled dates: %d\n", length(existing_tile_dates))) + } + cat(sprintf(" Missing dates in window: %d\n", length(missing_dates))) + + # Download each missing date + download_count <- 0 + download_failed <- 0 + + if (length(missing_dates) > 0) { + # Save current directory + original_dir <- getwd() + + # Change to python_app directory so relative paths work correctly + setwd("python_app") + + for (date_str in missing_dates) { + cmd <- sprintf('python 00_download_8band_pu_optimized.py "%s" --date "%s" --resolution 3 --cleanup', project_dir, date_str) + result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE) + if (result == 0) { + download_count <- download_count + 1 + } else { + download_failed <- download_failed + 1 + } + } + + # Change back to original directory + setwd(original_dir) + } + + cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed)) + if (download_failed > 0) { + cat("⚠ Some downloads failed, but continuing pipeline\n") + } + + # Force Script 10 to run ONLY if downloads actually succeeded (not just attempted) + if (download_count > 0) { + skip_10 <- FALSE + } + }, + error = function(e) { + cat("✗ Error in planet download:", e$message, "\n") + pipeline_success <<- FALSE } - - cat(sprintf("✓ Downloaded %d dates, %d failed\n", download_count, download_failed)) - if (download_failed > 0) { - cat("⚠ Some downloads failed, but continuing pipeline\n") - } - - # Force Script 10 to run ONLY if downloads actually succeeded (not just attempted) - if (download_count > 0) { - skip_10 <- FALSE - } - -}, error = function(e) { - cat("✗ Error in planet download:", e$message, "\n") - pipeline_success <<- FALSE -}) +) # ============================================================================== # SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs # ============================================================================== if (pipeline_success && !skip_10) { cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n") - tryCatch({ - # CRITICAL: Save global variables before sourcing Script 10 (it overwrites end_date, offset, etc.) - saved_end_date <- end_date - saved_offset <- offset # Use FULL offset for tiling (not dynamic_offset) - saved_project_dir <- project_dir - saved_data_source <- data_source - - # Set environment variables for the script (Script 10 uses these for filtering) - assign("PROJECT", project_dir, envir = .GlobalEnv) - assign("end_date", end_date, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) # Full reporting window - - # Suppress verbose per-date output, show only summary - sink(nullfile()) - source("r_app/10_create_master_grid_and_split_tiffs.R") - sink() - - # CRITICAL: Restore global variables after sourcing Script 10 - end_date <- saved_end_date - offset <- saved_offset - project_dir <- saved_project_dir - data_source <- saved_data_source - - # Verify output - tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") - if (dir.exists(tiles_dir)) { - subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) - cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs))) - } else { - cat("✓ Script 10 completed\n") + tryCatch( + { + # CRITICAL: Save global variables before sourcing Script 10 (it overwrites end_date, offset, etc.) + saved_end_date <- end_date + saved_offset <- offset # Use FULL offset for tiling (not dynamic_offset) + saved_project_dir <- project_dir + saved_data_source <- data_source + + # Set environment variables for the script (Script 10 uses these for filtering) + assign("PROJECT", project_dir, envir = .GlobalEnv) + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) # Full reporting window + + # Suppress verbose per-date output, show only summary + sink(nullfile()) + source("r_app/10_create_master_grid_and_split_tiffs.R") + sink() + + # CRITICAL: Restore global variables after sourcing Script 10 + end_date <- saved_end_date + offset <- saved_offset + project_dir <- saved_project_dir + data_source <- saved_data_source + + # Verify output - auto-detect grid size + grid_size <- detect_grid_size(project_dir) + tiles_dir <- if (grid_size != "unknown") { + file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", grid_size) + } else { + file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") + } + if (dir.exists(tiles_dir)) { + subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) + cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs))) + } else { + cat("✓ Script 10 completed\n") + } + }, + error = function(e) { + sink() + cat("✗ Error in Script 10:", e$message, "\n") + pipeline_success <<- FALSE } - }, error = function(e) { - sink() - cat("✗ Error in Script 10:", e$message, "\n") - pipeline_success <<- FALSE - }) + ) } else if (skip_10) { cat("\n========== SKIPPING SCRIPT 10 (tiles already exist) ==========\n") } @@ -523,30 +432,36 @@ if (pipeline_success && !skip_10) { # ============================================================================== if (pipeline_success && !skip_20) { cat("\n========== RUNNING SCRIPT 20: CI EXTRACTION ==========\n") - tryCatch({ - # Run Script 20 via system() to pass command-line args just like from terminal - # Arguments: end_date offset project_dir data_source - # Use FULL offset so CI extraction covers entire reporting window (not just new data) - cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/20_ci_extraction.R "%s" %d "%s" "%s"', - format(end_date, "%Y-%m-%d"), offset, project_dir, data_source) - result <- system(cmd) - - if (result != 0) { - stop("Script 20 exited with error code:", result) + tryCatch( + { + # Run Script 20 via system() to pass command-line args just like from terminal + # Arguments: end_date offset project_dir data_source + # Use FULL offset so CI extraction covers entire reporting window (not just new data) + cmd <- sprintf( + '"%s" --vanilla r_app/20_ci_extraction.R "%s" %d "%s" "%s"', + RSCRIPT_PATH, + format(end_date, "%Y-%m-%d"), offset, project_dir, data_source + ) + result <- system(cmd) + + if (result != 0) { + stop("Script 20 exited with error code:", result) + } + + # Verify CI output was created + ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") + if (dir.exists(ci_daily_dir)) { + files <- list.files(ci_daily_dir, pattern = "\\.rds$") + cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files))) + } else { + cat("✓ Script 20 completed\n") + } + }, + error = function(e) { + cat("✗ Error in Script 20:", e$message, "\n") + pipeline_success <<- FALSE } - - # Verify CI output was created - ci_daily_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") - if (dir.exists(ci_daily_dir)) { - files <- list.files(ci_daily_dir, pattern = "\\.rds$") - cat(sprintf("✓ Script 20 completed - generated %d CI files\n", length(files))) - } else { - cat("✓ Script 20 completed\n") - } - }, error = function(e) { - cat("✗ Error in Script 20:", e$message, "\n") - pipeline_success <<- FALSE - }) + ) } else if (skip_20) { cat("\n========== SKIPPING SCRIPT 20 (CI already extracted) ==========\n") } @@ -556,27 +471,30 @@ if (pipeline_success && !skip_20) { # ============================================================================== if (pipeline_success && !skip_21) { cat("\n========== RUNNING SCRIPT 21: CONVERT CI RDS TO CSV ==========\n") - tryCatch({ - # Set environment variables for the script - assign("end_date", end_date, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) - assign("project_dir", project_dir, envir = .GlobalEnv) - - source("r_app/21_convert_ci_rds_to_csv.R") - main() # Call main() to execute the script with the environment variables - - # Verify CSV output was created - ci_csv_path <- file.path("laravel_app", "storage", "app", project_dir, "ci_extracted") - if (dir.exists(ci_csv_path)) { - csv_files <- list.files(ci_csv_path, pattern = "\\.csv$") - cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files))) - } else { - cat("✓ Script 21 completed\n") + tryCatch( + { + # Set environment variables for the script + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) + assign("project_dir", project_dir, envir = .GlobalEnv) + + source("r_app/21_convert_ci_rds_to_csv.R") + main() # Call main() to execute the script with the environment variables + + # Verify CSV output was created + ci_csv_path <- file.path("laravel_app", "storage", "app", project_dir, "ci_extracted") + if (dir.exists(ci_csv_path)) { + csv_files <- list.files(ci_csv_path, pattern = "\\.csv$") + cat(sprintf("✓ Script 21 completed - converted to %d CSV files\n", length(csv_files))) + } else { + cat("✓ Script 21 completed\n") + } + }, + error = function(e) { + cat("✗ Error in Script 21:", e$message, "\n") + pipeline_success <<- FALSE } - }, error = function(e) { - cat("✗ Error in Script 21:", e$message, "\n") - pipeline_success <<- FALSE - }) + ) } else if (skip_21) { cat("\n========== SKIPPING SCRIPT 21 (CSV already created) ==========\n") } @@ -586,30 +504,36 @@ if (pipeline_success && !skip_21) { # ============================================================================== if (pipeline_success && !skip_30) { cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") - tryCatch({ - # Run Script 30 via system() to pass command-line args just like from terminal - # Script 30 expects: project_dir data_source as arguments - # Pass the same data_source that Script 20 is using - cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/30_interpolate_growth_model.R "%s" "%s"', - project_dir, data_source_used) - result <- system(cmd) - - if (result != 0) { - stop("Script 30 exited with error code:", result) + tryCatch( + { + # Run Script 30 via system() to pass command-line args just like from terminal + # Script 30 expects: project_dir data_source as arguments + # Pass the same data_source that Script 20 is using + cmd <- sprintf( + '"%s" --vanilla r_app/30_interpolate_growth_model.R "%s" "%s"', + RSCRIPT_PATH, + project_dir, data_source_used + ) + result <- system(cmd) + + if (result != 0) { + stop("Script 30 exited with error code:", result) + } + + # Verify interpolated output + growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated") + if (dir.exists(growth_dir)) { + files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$") + cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files))) + } else { + cat("✓ Script 30 completed\n") + } + }, + error = function(e) { + cat("✗ Error in Script 30:", e$message, "\n") + pipeline_success <<- FALSE } - - # Verify interpolated output - growth_dir <- file.path("laravel_app", "storage", "app", project_dir, "growth_model_interpolated") - if (dir.exists(growth_dir)) { - files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$") - cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files))) - } else { - cat("✓ Script 30 completed\n") - } - }, error = function(e) { - cat("✗ Error in Script 30:", e$message, "\n") - pipeline_success <<- FALSE - }) + ) } # ============================================================================== @@ -617,33 +541,36 @@ if (pipeline_success && !skip_30) { # ============================================================================== if (pipeline_success && !skip_31) { cat("\n========== RUNNING PYTHON 31: HARVEST IMMINENT WEEKLY ==========\n") - tryCatch({ - # Run Python script in pytorch_gpu conda environment - # Script expects positional project name (not --project flag) - # Run from smartcane root so conda can find the environment - cmd <- sprintf('conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s', project_dir) - cat("DEBUG: Running command:", cmd, "\n") - result <- system(cmd) - - if (result == 0) { - # Verify harvest output - check for THIS WEEK's specific file - current_week <- as.numeric(format(end_date, "%V")) - current_year <- as.numeric(format(end_date, "%Y")) - expected_file <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats", - sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) - - if (file.exists(expected_file)) { - cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", current_week)) + tryCatch( + { + # Run Python script in pytorch_gpu conda environment + # Script expects positional project name (not --project flag) + # Run from smartcane root so conda can find the environment + cmd <- sprintf("conda run -n pytorch_gpu python python_app/31_harvest_imminent_weekly.py %s", project_dir) + result <- system(cmd) + + if (result == 0) { + # Verify harvest output - check for THIS WEEK's specific file + wwy_current_31 <- get_iso_week_year(end_date) + expected_file <- file.path( + "laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats", + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, wwy_current_31$week, wwy_current_31$year) + ) + + if (file.exists(expected_file)) { + cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", wwy_current_31$week)) + } else { + cat("✓ Script 31 completed (check if harvest.xlsx is available)\n") + } } else { - cat("✓ Script 31 completed (check if harvest.xlsx is available)\n") + cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n") } - } else { - cat("⚠ Script 31 completed with errors (check harvest.xlsx availability)\n") + }, + error = function(e) { + setwd(original_dir) + cat("⚠ Script 31 error:", e$message, "\n") } - }, error = function(e) { - setwd(original_dir) - cat("⚠ Script 31 error:", e$message, "\n") - }) + ) } else if (skip_31) { cat("\n========== SKIPPING SCRIPT 31 (non-cane_supply client type) ==========\n") } @@ -653,62 +580,70 @@ if (pipeline_success && !skip_31) { # ============================================================================== if (pipeline_success && !skip_40) { cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n") - + # If there are missing weeks, process them one at a time if (nrow(missing_weeks) > 0) { cat(sprintf("Found %d missing week(s) - running Script 40 once per week\n\n", nrow(missing_weeks))) - + # Loop through missing weeks in reverse chronological order (oldest first) for (week_idx in nrow(missing_weeks):1) { missing_week <- missing_weeks[week_idx, ] week_num <- missing_week$week year_num <- missing_week$year week_end_date <- as.Date(missing_week$week_end_date) - - cat(sprintf("--- Creating mosaic for week %02d/%d (ending %s) ---\n", - week_num, year_num, format(week_end_date, "%Y-%m-%d"))) - - tryCatch({ - # Run Script 40 with offset=7 (one week only) for this specific week - # The end_date is the last day of the week, and offset=7 covers the full 7-day week - # IMPORTANT: Pass data_source so Script 40 uses the correct folder (not auto-detect which can be wrong) - cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/40_mosaic_creation.R "%s" 7 "%s" "" "%s"', - format(week_end_date, "%Y-%m-%d"), project_dir, data_source) - result <- system(cmd) - - if (result != 0) { - stop("Script 40 exited with error code:", result) - } - - # Verify mosaic was created for this specific week - mosaic_created <- FALSE - if (mosaic_mode == "tiled") { - mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") - if (dir.exists(mosaic_dir)) { - week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) - mosaic_files <- list.files(mosaic_dir, pattern = week_pattern) - mosaic_created <- length(mosaic_files) > 0 + + cat(sprintf( + "--- Creating mosaic for week %02d/%d (ending %s) ---\n", + week_num, year_num, format(week_end_date, "%Y-%m-%d") + )) + + tryCatch( + { + # Run Script 40 with offset=7 (one week only) for this specific week + # The end_date is the last day of the week, and offset=7 covers the full 7-day week + # IMPORTANT: Pass data_source so Script 40 uses the correct folder (not auto-detect which can be wrong) + cmd <- sprintf( + '"%s" --vanilla r_app/40_mosaic_creation.R "%s" 7 "%s" "" "%s"', + RSCRIPT_PATH, + format(week_end_date, "%Y-%m-%d"), project_dir, data_source + ) + result <- system(cmd) + + if (result != 0) { + stop("Script 40 exited with error code:", result) } - } else { - mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") - if (dir.exists(mosaic_dir)) { - week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) - mosaic_files <- list.files(mosaic_dir, pattern = week_pattern) - mosaic_created <- length(mosaic_files) > 0 + + # Verify mosaic was created for this specific week + mosaic_created <- FALSE + if (mosaic_mode == "tiled") { + mosaic_dir <- get_mosaic_dir(project_dir, mosaic_mode = "tiled") + if (dir.exists(mosaic_dir)) { + week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) + mosaic_files <- list.files(mosaic_dir, pattern = week_pattern) + mosaic_created <- length(mosaic_files) > 0 + } + } else { + mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") + if (dir.exists(mosaic_dir)) { + week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) + mosaic_files <- list.files(mosaic_dir, pattern = week_pattern) + mosaic_created <- length(mosaic_files) > 0 + } } + + if (mosaic_created) { + cat(sprintf("✓ Week %02d/%d mosaic created successfully\n\n", week_num, year_num)) + } else { + cat(sprintf("✓ Week %02d/%d processing completed (verify output)\n\n", week_num, year_num)) + } + }, + error = function(e) { + cat(sprintf("✗ Error creating mosaic for week %02d/%d: %s\n", week_num, year_num, e$message), "\n") + pipeline_success <<- FALSE } - - if (mosaic_created) { - cat(sprintf("✓ Week %02d/%d mosaic created successfully\n\n", week_num, year_num)) - } else { - cat(sprintf("✓ Week %02d/%d processing completed (verify output)\n\n", week_num, year_num)) - } - }, error = function(e) { - cat(sprintf("✗ Error creating mosaic for week %02d/%d: %s\n", week_num, year_num, e$message), "\n") - pipeline_success <<- FALSE - }) + ) } - + if (pipeline_success) { cat(sprintf("✓ Script 40 completed - created all %d missing week mosaics\n", nrow(missing_weeks))) } @@ -725,54 +660,67 @@ if (pipeline_success && !skip_40) { # ============================================================================== if (pipeline_success && !skip_80) { cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs FOR REPORTING WINDOW ==========\n") - + # Build list of weeks that NEED calculation (missing KPIs) - weeks_to_calculate <- kpis_needed[!kpis_needed$has_kpis, ] # Only weeks WITHOUT KPIs - + weeks_to_calculate <- kpis_needed[!kpis_needed$has_kpis, ] # Only weeks WITHOUT KPIs + if (nrow(weeks_to_calculate) > 0) { # Sort by date (oldest to newest) for sequential processing weeks_to_calculate <- weeks_to_calculate[order(weeks_to_calculate$date), ] - - cat(sprintf("Looping through %d missing week(s) in reporting window (from %s back to %s):\n\n", - nrow(weeks_to_calculate), - format(max(weeks_to_calculate$date), "%Y-%m-%d"), - format(min(weeks_to_calculate$date), "%Y-%m-%d"))) - - tryCatch({ - for (week_idx in 1:nrow(weeks_to_calculate)) { - week_row <- weeks_to_calculate[week_idx, ] - calc_date <- week_row$date - - # Run Script 80 for this specific week with offset=7 (one week only) - # This ensures Script 80 calculates KPIs for THIS week with proper trend data - cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/80_calculate_kpis.R "%s" "%s" %d', - format(calc_date, "%Y-%m-%d"), project_dir, 7) # offset=7 for single week - - cat(sprintf(" [Week %02d/%d] Running Script 80 with end_date=%s...\n", - week_row$week, week_row$year, format(calc_date, "%Y-%m-%d"))) - - result <- system(cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) - - if (result == 0) { - cat(sprintf(" ✓ KPIs calculated for week %02d/%d\n", week_row$week, week_row$year)) - } else { - cat(sprintf(" ✗ Error calculating KPIs for week %02d/%d (exit code: %d)\n", - week_row$week, week_row$year, result)) + + cat(sprintf( + "Looping through %d missing week(s) in reporting window (from %s back to %s):\n\n", + nrow(weeks_to_calculate), + format(max(weeks_to_calculate$date), "%Y-%m-%d"), + format(min(weeks_to_calculate$date), "%Y-%m-%d") + )) + + tryCatch( + { + for (week_idx in 1:nrow(weeks_to_calculate)) { + week_row <- weeks_to_calculate[week_idx, ] + calc_date <- week_row$date + + # Run Script 80 for this specific week with offset=7 (one week only) + # This ensures Script 80 calculates KPIs for THIS week with proper trend data + cmd <- sprintf( + '"%s" --vanilla r_app/80_calculate_kpis.R "%s" "%s" %d', + RSCRIPT_PATH, + format(calc_date, "%Y-%m-%d"), project_dir, 7 + ) # offset=7 for single week + + cat(sprintf( + " [Week %02d/%d] Running Script 80 with end_date=%s...\n", + week_row$week, week_row$year, format(calc_date, "%Y-%m-%d") + )) + + result <- system(cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) + + if (result == 0) { + cat(sprintf(" ✓ KPIs calculated for week %02d/%d\n", week_row$week, week_row$year)) + } else { + cat(sprintf( + " ✗ Error calculating KPIs for week %02d/%d (exit code: %d)\n", + week_row$week, week_row$year, result + )) + } } + + # Verify total KPI output (kpi_dir defined by check_kpi_completeness() earlier) + if (dir.exists(kpi_dir)) { + files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") + # Extract subdir name from kpi_dir path for display + subdir_name <- basename(kpi_dir) + cat(sprintf("\n✓ Script 80 loop completed - total %d KPI files in %s/\n", length(files), subdir_name)) + } else { + cat("\n✓ Script 80 loop completed\n") + } + }, + error = function(e) { + cat("✗ Error in Script 80 loop:", e$message, "\n") + pipeline_success <<- FALSE } - - # Verify total KPI output - kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir) - if (dir.exists(kpi_dir)) { - files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") - cat(sprintf("\n✓ Script 80 loop completed - total %d KPI files in %s/\n", length(files), kpi_subdir)) - } else { - cat("\n✓ Script 80 loop completed\n") - } - }, error = function(e) { - cat("✗ Error in Script 80 loop:", e$message, "\n") - pipeline_success <<- FALSE - }) + ) } else { cat(sprintf("✓ All %d weeks already have KPIs - skipping calculation\n", nrow(kpis_needed))) } @@ -792,11 +740,11 @@ if (dir.exists(kpi_dir)) { check_date <- end_date - (weeks_back * 7) week_num <- as.numeric(format(check_date, "%V")) year_num <- as.numeric(format(check_date, "%G")) - + # Check for any KPI file from that week week_pattern <- sprintf("week%02d_%d", week_num, year_num) kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern) - + if (length(kpi_files_this_week) == 0) { kpis_complete <- FALSE cat(sprintf(" Week %02d/%d: ✗ KPIs not found\n", week_num, year_num)) @@ -815,40 +763,45 @@ if (kpis_complete) { # ============================================================================== if (pipeline_success && run_legacy_report) { cat("\n========== RUNNING SCRIPT 90: LEGACY WORD REPORT ==========\n") - + if (!kpis_complete) { cat("⚠ Skipping Script 90 - KPIs not available for full reporting window\n") } else { - tryCatch({ - # Script 90 is an RMarkdown file - compile it with rmarkdown::render() - output_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports") - - # Ensure output directory exists - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + tryCatch( + { + # Script 90 is an RMarkdown file - compile it with rmarkdown::render() + output_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports") + + # Ensure output directory exists + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + } + + output_filename <- sprintf( + "CI_report_week%02d_%d.docx", + as.numeric(format(end_date, "%V")), + as.numeric(format(end_date, "%G")) + ) + + # Render the RMarkdown document + rmarkdown::render( + input = "r_app/90_CI_report_with_kpis_simple.Rmd", + output_dir = output_dir, + output_file = output_filename, + params = list( + report_date = format(end_date, "%Y-%m-%d"), + data_dir = project_dir + ), + quiet = TRUE + ) + + cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", output_filename)) + }, + error = function(e) { + cat("✗ Error in Script 90:", e$message, "\n") + pipeline_success <<- FALSE } - - output_filename <- sprintf("CI_report_week%02d_%d.docx", - as.numeric(format(end_date, "%V")), - as.numeric(format(end_date, "%G"))) - - # Render the RMarkdown document - rmarkdown::render( - input = "r_app/90_CI_report_with_kpis_simple.Rmd", - output_dir = output_dir, - output_file = output_filename, - params = list( - report_date = format(end_date, "%Y-%m-%d"), - data_dir = project_dir - ), - quiet = TRUE - ) - - cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", output_filename)) - }, error = function(e) { - cat("✗ Error in Script 90:", e$message, "\n") - pipeline_success <<- FALSE - }) + ) } } else if (run_legacy_report) { cat("\n========== SKIPPING SCRIPT 90 (pipeline error or KPIs incomplete) ==========\n") @@ -859,40 +812,45 @@ if (pipeline_success && run_legacy_report) { # ============================================================================== if (pipeline_success && run_modern_report) { cat("\n========== RUNNING SCRIPT 91: MODERN WORD REPORT ==========\n") - + if (!kpis_complete) { cat("⚠ Skipping Script 91 - KPIs not available for full reporting window\n") } else { - tryCatch({ - # Script 91 is an RMarkdown file - compile it with rmarkdown::render() - output_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports") - - # Ensure output directory exists - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + tryCatch( + { + # Script 91 is an RMarkdown file - compile it with rmarkdown::render() + output_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports") + + # Ensure output directory exists + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + } + + output_filename <- sprintf( + "CI_report_week%02d_%d.docx", + as.numeric(format(end_date, "%V")), + as.numeric(format(end_date, "%G")) + ) + + # Render the RMarkdown document + rmarkdown::render( + input = "r_app/91_CI_report_with_kpis_Angata.Rmd", + output_dir = output_dir, + output_file = output_filename, + params = list( + report_date = format(end_date, "%Y-%m-%d"), + data_dir = project_dir + ), + quiet = TRUE + ) + + cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", output_filename)) + }, + error = function(e) { + cat("✗ Error in Script 91:", e$message, "\n") + pipeline_success <<- FALSE } - - output_filename <- sprintf("CI_report_week%02d_%d.docx", - as.numeric(format(end_date, "%V")), - as.numeric(format(end_date, "%G"))) - - # Render the RMarkdown document - rmarkdown::render( - input = "r_app/91_CI_report_with_kpis_Angata.Rmd", - output_dir = output_dir, - output_file = output_filename, - params = list( - report_date = format(end_date, "%Y-%m-%d"), - data_dir = project_dir - ), - quiet = TRUE - ) - - cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", output_filename)) - }, error = function(e) { - cat("✗ Error in Script 91:", e$message, "\n") - pipeline_success <<- FALSE - }) + ) } } else if (run_modern_report) { cat("\n========== SKIPPING SCRIPT 91 (pipeline error or KPIs incomplete) ==========\n")