seperate scripts work for angata, except for the word doc.

2026-02-09 20:34:11 +01:00 · 2026-02-09 20:34:11 +01:00 · bfd56ccd16
parent 3ee3f9e31c
commit bfd56ccd16
7 changed files with 250 additions and 156 deletions
--- a/r_app/10_create_per_field_tiffs.R
+++ b/r_app/10_create_per_field_tiffs.R
@ -19,13 +19,15 @@
 #   - Naming: Per-field GeoTIFFs organized by field and date
 #
 # USAGE:
-#   Rscript 10_create_per_field_tiffs.R [project]
+#   Rscript 10_create_per_field_tiffs.R [project] [end_date] [offset]
 #
 #   Example (Windows PowerShell):
-#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
 #
 # PARAMETERS:
-#   - project: Project name (character) - angata, chemba, xinavane, esa, simba
+#   - project: Project name (character) - angata, chemba, xinavane, esa, simba (default: angata)
 #   - end_date: End date for processing (YYYY-MM-DD format, default: today)
 #   - offset: Days to look back (numeric, default: 7)
 #
 # CLIENT TYPES:
 #   - cane_supply (ANGATA): Yes - primary data organization script
@ -70,10 +72,16 @@ main <- function() {
  # STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
  args <- commandArgs(trailingOnly = TRUE)
  project_dir <- if (length(args) == 0) "angata" else args[1]
-  # Make project_dir available to sourced files (they execute in global scope)
+  # Parse arguments: [project] [end_date] [offset]
  project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
  end_date_arg <- if (length(args) >= 2 && args[2] != "") as.Date(args[2], format = "%Y-%m-%d") else Sys.Date()
  offset_arg <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
  # Make variables available to sourced files (they execute in global scope)
  assign("project_dir", project_dir, envir = .GlobalEnv)
  assign("end_date", end_date_arg, envir = .GlobalEnv)
  assign("offset", offset_arg, envir = .GlobalEnv)
  # STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
  # Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
@ -97,7 +105,7 @@ main <- function() {
  # Window: end_date - offset days to end_date
  # Always coerce to correct types to avoid issues with lingering/inherited values
  if (!exists("end_date") || !inherits(end_date, "Date")) {
-    end_date <- as.Date("2026-02-04")
+    end_date <- Sys.Date()
    safe_log(paste("Using default end_date:", end_date), "INFO")
  }
  if (!exists("offset") || !is.numeric(offset)) {
--- a/r_app/20_ci_extraction_per_field.R
+++ b/r_app/20_ci_extraction_per_field.R
@ -127,91 +127,96 @@ main <- function() {
    }
  }
-  # Process each DATE (OPTIMIZED: load TIFF once, process all fields)
+  # Process each DATE (load merged TIFF once, extract all fields from it)
  total_success <- 0
  total_error <- 0
  ci_results_by_date <- list()
  for (date_str in dates_filter) {
-    # Load the merged TIFF ONCE for this date
+    # Load the MERGED TIFF (farm-wide) ONCE for this date
-    merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str))
+    input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
-    # Find the actual TIFF path (it's in the first field that has it)
+    if (!file.exists(input_tif_merged)) {
-    input_tif_full <- NULL
+      safe_log(sprintf("  %s: merged_tif not found (skipping)", date_str))
-    for (field in fields) {
+      total_error <<- total_error + 1
      candidate_path <- file.path(setup$field_tiles_dir, field, sprintf("%s.tif", date_str))
      if (file.exists(candidate_path)) {
        input_tif_full <- candidate_path
        break
      }
    }
    if (is.null(input_tif_full)) {
      safe_log(sprintf("  %s: Input TIFF not found (skipping)", date_str))
      next
    }
    tryCatch({
-      # Load TIFF ONCE
+      # Load 4-band TIFF ONCE
-      raster_4band <- terra::rast(input_tif_full)
+      raster_4band <- terra::rast(input_tif_merged)
      safe_log(sprintf("  %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
      # Calculate CI from 4-band
      ci_raster <- calc_ci_from_raster(raster_4band)
      # Create 5-band (R, G, B, NIR, CI)
      five_band <- c(raster_4band, ci_raster)
      # Now process all fields from this single merged TIFF
      fields_processed_this_date <- 0
      # Now process all fields from this single TIFF
      for (field in fields) {
        field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
        field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
        # Pre-create output directories
        dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
        dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
        output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
        output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
        # MODE 3: Skip if both outputs already exist
        if (file.exists(output_tif) && file.exists(output_rds)) {
-          next  # Skip to next field
+          next
        }
        # MODE 2: Regeneration mode - RDS missing but CI TIFF exists
        if (file.exists(output_tif) && !file.exists(output_rds)) {
          tryCatch({
            extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
-            total_success <<- total_success + 1
+            fields_processed_this_date <- fields_processed_this_date + 1
          }, error = function(e) {
-            total_error <<- total_error + 1
+            # Continue to next field
          })
          next
        }
-        # MODE 1: Normal mode - calculate CI from 4-band input
+        # MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
        tryCatch({
-          # Calculate CI
+          # Crop 5-band TIFF to field boundary
-          ci_raster <- calc_ci_from_raster(raster_4band)
+          field_geom <- field_boundaries_sf %>% filter(field == !!field)
          five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
-          # Create 5-band TIFF (R, G, B, NIR, CI)
+          # Save 5-band field TIFF
-          five_band <- c(raster_4band, ci_raster)
+          terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
-          # Save 5-band TIFF
+          # Extract CI statistics by sub_field (from cropped CI raster)
-          terra::writeRaster(five_band, output_tif, overwrite = TRUE)
+          ci_cropped <- five_band_cropped[[5]]  # 5th band is CI
-          
+          ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field)
          # Extract CI statistics by sub_field
          ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field)
          # Save RDS
          if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
            saveRDS(ci_stats, output_rds)
            # Store for daily aggregation
            ci_stats_with_date <- ci_stats %>% mutate(date = date_str)
            key <- sprintf("%s_%s", field, date_str)
            ci_results_by_date[[key]] <<- ci_stats_with_date
          }
-          total_success <<- total_success + 1
+          fields_processed_this_date <- fields_processed_this_date + 1
        }, error = function(e) {
-          total_error <<- total_error + 1
+          # Error in individual field, continue to next
          safe_log(sprintf("    Error processing field %s: %s", field, e$message), "WARNING")
        })
      }
      # Increment success counter if at least one field succeeded
      if (fields_processed_this_date > 0) {
        total_success <<- total_success + 1
        safe_log(sprintf("  %s: Processed %d fields", date_str, fields_processed_this_date))
      }
    }, error = function(e) {
      safe_log(sprintf("  %s: ✗ Error loading TIFF - %s", date_str, e$message), "ERROR")
      total_error <<- total_error + 1
      safe_log(sprintf("  %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
    })
  }
--- a/r_app/30_growth_model_utils.R
+++ b/r_app/30_growth_model_utils.R
@ -4,13 +4,22 @@
 # ===================
 # Utility functions for growth model interpolation and manipulation.
 # These functions support the creation of continuous growth models from point measurements.
 #
 # PERFORMANCE OPTIMIZATION:
 #   - Parallel file I/O: Reads 450k+ RDS files using furrr::future_map_dfr()
 #   - Parallel field interpolation: Processes fields in parallel (1 core per ~100 fields)
 #   - Dynamic CPU detection: Allocates workers based on available cores
 #   - Windows compatible: Uses furrr with plan(multisession) for cross-platform support
 #' Load and prepare the combined CI data (Per-Field Architecture)
 #' OPTIMIZE: Filters by date during load (skip unnecessary date ranges)
 #' PARALLELIZE: Reads 450k+ RDS files in parallel using furrr::future_map_dfr()
 #'
 #' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals)
 #' @param harvesting_data Optional: Dataframe with season dates. If provided, only loads files within season ranges (major speedup)
 #' @return Long-format dataframe with CI values by date and field
 #'
-load_combined_ci_data <- function(daily_vals_dir) {
+load_combined_ci_data <- function(daily_vals_dir, harvesting_data = NULL) {
  # For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals
  # Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
@ -20,6 +29,17 @@ load_combined_ci_data <- function(daily_vals_dir) {
  safe_log(paste("Loading per-field CI data from:", daily_vals_dir))
  # OPTIMIZATION: If harvest data provided, extract date range to avoid loading unnecessary dates
  date_filter_min <- NULL
  date_filter_max <- NULL
  if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
    date_filter_min <- min(harvesting_data$season_start, na.rm = TRUE)
    date_filter_max <- max(harvesting_data$season_end, na.rm = TRUE)
    safe_log(sprintf("Pre-filtering by harvest season dates: %s to %s", 
                     format(date_filter_min, "%Y-%m-%d"), 
                     format(date_filter_max, "%Y-%m-%d")))
  }
  # Find all daily RDS files recursively (per-field structure)
  # IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories
  all_daily_files <- list.files(
@ -37,71 +57,87 @@ load_combined_ci_data <- function(daily_vals_dir) {
    stop(paste("No per-field daily RDS files found in:", daily_vals_dir))
  }
-  safe_log(sprintf("Found %d per-field daily RDS files to load (filtered from legacy format)", length(all_daily_files)))
+  safe_log(sprintf("Found %d per-field daily RDS files (filtered from legacy format)", length(all_daily_files)))
-  # Rebuild with explicit date and field tracking
+  # OPTIMIZATION: Filter files by filename date BEFORE parallel loading
-  # File structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
+  # Skip files outside harvest season (can save 60-80% of I/O on large datasets)
-  combined_long <- data.frame()
+  if (!is.null(date_filter_min) && !is.null(date_filter_max)) {
    all_daily_files <- all_daily_files[
      {
        dates <- as.Date(tools::file_path_sans_ext(basename(all_daily_files)), format = "%Y-%m-%d")
        !is.na(dates) & dates >= date_filter_min & dates <= date_filter_max
      }
    ]
    safe_log(sprintf("Filtered to %d files within harvest season date range", length(all_daily_files)))
  }
-  for (file in all_daily_files) {
+  # Set up parallel future plan (Windows PSOCK multisession; Mac/Linux can use forking)
-    tryCatch({
+  # Automatically detect available cores and limit to reasonable number
  n_cores <- min(parallel::detectCores() - 1, 8)  # Use max 8 cores (diminishing returns after)
  future::plan(strategy = future::multisession, workers = n_cores)
  safe_log(sprintf("Using %d parallel workers for file I/O", n_cores))
  # Parallel file reading: future_map_dfr processes each file in parallel
  # Returns combined dataframe directly (no need to rbind)
  combined_long <- furrr::future_map_dfr(
    all_daily_files,
    .progress = TRUE,
    .options = furrr::furrr_options(seed = TRUE),
    function(file) {
      # Extract date from filename: {YYYY-MM-DD}.rds
      filename <- basename(file)
      date_str <- tools::file_path_sans_ext(filename)
-      # Parse date - handle various formats
+      # Parse date
      parsed_date <- NA
      if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) {
        parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
      } else {
-        safe_log(sprintf("Warning: Could not parse date from filename: %s", filename), "WARNING")
+        return(data.frame())  # Return empty dataframe if parse fails
        next
      }
      if (is.na(parsed_date)) {
-        safe_log(sprintf("Warning: Invalid date parsed from: %s", filename), "WARNING")
+        return(data.frame())
        next
      }
      # Read RDS file
-      rds_data <- tryCatch({
+      tryCatch({
-        readRDS(file)
+        rds_data <- readRDS(file)
        if (is.null(rds_data) || nrow(rds_data) == 0) {
          return(data.frame())
        }
        # Add date column to the data
        rds_data %>%
          dplyr::mutate(Date = parsed_date)
      }, error = function(e) {
-        safe_log(sprintf("Error reading RDS file %s: %s", file, e$message), "WARNING")
+        return(data.frame())  # Return empty dataframe on error
        return(NULL)
      })
-      
+    }
-      if (is.null(rds_data) || nrow(rds_data) == 0) {
+  )
-        next
+  
-      }
+  # Return to sequential processing to avoid nested parallelism
-      
+  future::plan(future::sequential)
      # Add date column to the data
      rds_data <- rds_data %>%
        dplyr::mutate(Date = parsed_date)
      combined_long <- rbind(combined_long, rds_data)
    }, error = function(e) {
      safe_log(sprintf("Error processing file %s: %s", file, e$message), "WARNING")
    })
  }
  if (nrow(combined_long) == 0) {
    safe_log("Warning: No valid CI data loaded from daily files", "WARNING")
    return(data.frame())
  }
  # OPTIMIZATION: Use data.table for fast filtering (10-20x faster than dplyr on large datasets)
  # Reshape to long format using ci_mean as the main CI value
-  # Only keep rows where ci_mean has valid data
+  DT <- data.table::as.data.table(combined_long)
-  pivot_stats_long <- combined_long %>%
+  DT <- DT[, .(field, sub_field, ci_mean, Date)]
-    dplyr::select(field, sub_field, ci_mean, Date) %>%
+  DT[, c("value") := list(as.numeric(ci_mean))]
-    dplyr::rename(value = ci_mean) %>%
+  DT[, ci_mean := NULL]
-    dplyr::mutate(value = as.numeric(value)) %>%
+  
-    # Keep rows even if ci_mean is NA or 0 (might be valid), but drop if Date is missing
+  # Fast filtering without .distinct() (which is slow on large datasets)
-    tidyr::drop_na(Date) %>%
+  # Keep rows where Date is valid, field/sub_field exist, and value is finite
-    dplyr::filter(!is.na(sub_field), !is.na(field)) %>%
+  DT <- DT[!is.na(Date) & !is.na(sub_field) & !is.na(field) & is.finite(value)]
-    dplyr::filter(!is.infinite(value)) %>%
+  
-    dplyr::distinct()
+  # Convert back to tibble for compatibility with rest of pipeline
  pivot_stats_long <- dplyr::as_tibble(DT)
  safe_log(sprintf("Loaded %d CI data points from %d daily files", 
                   nrow(pivot_stats_long), length(all_daily_files)))
@ -194,6 +230,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
 }
 #' Generate interpolated CI data for all fields and seasons
 #' PARALLELIZE: Processes fields in parallel using furrr::future_map_df()
 #'
 #' @param years Vector of years to process
 #' @param harvesting_data Dataframe with harvesting information
@ -227,40 +264,50 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
      return(data.frame())
    }
    # Initialize progress bar for this year
    total_fields <<- total_fields + length(valid_sub_fields)
-    pb <- txtProgressBar(min = 0, max = length(valid_sub_fields), style = 3, width = 50)
+    safe_log(sprintf("Year %d: Processing %d fields in parallel", yr, length(valid_sub_fields)))
    counter <- 0
-    # Extract and interpolate data for each valid field with progress bar
+    # Set up parallel future plan for field interpolation
-    result_list <- list()
+    # Allocate 1 core per ~100 fields (with minimum 2 cores)
-    for (field in valid_sub_fields) {
+    n_cores <- max(2, min(parallel::detectCores() - 1, ceiling(length(valid_sub_fields) / 100)))
-      counter <- counter + 1
+    future::plan(strategy = future::multisession, workers = n_cores)
-      setTxtProgressBar(pb, counter)
+    
-      
+    # PARALLELIZE: Process all fields in parallel (each extracts & interpolates independently)
-      # Call with verbose=FALSE to suppress warnings during progress bar iteration
+    result_list <- furrr::future_map(
-      field_result <- extract_CI_data(field, 
+      valid_sub_fields,
-                                       harvesting_data = harvesting_data, 
+      .progress = TRUE,
-                                       field_CI_data = ci_data, 
+      .options = furrr::furrr_options(seed = TRUE),
-                                       season = yr,
+      function(field) {
-                                       verbose = FALSE)
+        # Call with verbose=FALSE to suppress warnings during parallel iteration
        extract_CI_data(field, 
                       harvesting_data = harvesting_data, 
                       field_CI_data = ci_data, 
                       season = yr,
                       verbose = FALSE)
      }
    )
    # Return to sequential processing
    future::plan(future::sequential)
    # Process results and tracking
    for (i in seq_along(result_list)) {
      field_result <- result_list[[i]]
      field_name <- valid_sub_fields[i]
      if (nrow(field_result) > 0) {
        successful_fields <<- successful_fields + 1
        result_list[[field]] <- field_result
      } else {
        # Track failed field
        failed_fields[[length(failed_fields) + 1]] <<- list(
-          field = field,
+          field = field_name,
          season = yr,
          reason = "Unable to generate interpolated data"
        )
      }
    }
    close(pb)
    cat("\n")  # Newline after progress bar
    # Combine all results for this year
    result_list <- result_list[sapply(result_list, nrow) > 0]  # Keep only non-empty
    if (length(result_list) > 0) {
      purrr::list_rbind(result_list)
    } else {
--- a/r_app/30_interpolate_growth_model.R
+++ b/r_app/30_interpolate_growth_model.R
@ -60,6 +60,12 @@ suppressPackageStartupMessages({
  library(tidyverse)  # For dplyr (data wrangling, grouping, mutating)
  library(lubridate)  # For date/time operations (date arithmetic, ISO week extraction)
  library(readxl)     # For reading harvest.xlsx (harvest dates for growth model phases)
  # Parallel processing (Windows PSOCK + Mac/Linux fork-safe)
  library(future)     # For setting up parallel execution plans
  library(furrr)      # For future_map_dfr (parallel file I/O and field processing)
  library(parallel)   # For detectCores (automatic CPU detection)
  library(data.table) # For fast filtering on large datasets
 })
 # =============================================================================
@ -110,23 +116,24 @@ main <- function() {
  safe_log("Starting CI growth model interpolation")
  # Set up data directory paths
  data_dir <- setup$data_dir
  # Load and process the data
  tryCatch({
    # Load the combined CI data (created by Script 20 per-field)
    # Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
-    CI_data <- load_combined_ci_data(daily_vals_dir)
+    # OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files)
    # Load harvesting data from harvest.xlsx for growth model phase assignment
    # Use the centralized load_harvesting_data() function which handles NA season_end values
    # by setting them to Sys.Date() (field is still in current growing season)
    data_dir <- setup$data_dir
    harvesting_data <- tryCatch({
      load_harvesting_data(data_dir)
    }, error = function(e) {
-      safe_log(paste("Error loading harvest data:", e$message), "WARNING")
+      safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING")
      NULL
    })
    # Load CI data with date range pre-filtering
    CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data)
    # Validate harvesting data
    if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
      safe_log("No harvesting data available", "ERROR")
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@ -139,6 +139,7 @@ suppressPackageStartupMessages({
  library(readr)     # For reading CSV files (harvest predictions from Python)
  library(readxl)    # For reading harvest.xlsx (harvest dates for field mapping)
  library(writexl)   # For writing Excel outputs (KPI summary tables)
  library(progress)  # For progress bars during field processing
  # ML/Analysis (optional - only for harvest model inference)
  tryCatch({
@ -573,8 +574,10 @@ main <- function() {
  message(paste("  ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
  # Load weekly harvest probabilities from script 31 (if available)
  # Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
  message("\n4. Loading harvest probabilities from script 31...")
-  harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats", 
+  harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
  harvest_prob_file <- file.path(harvest_prob_dir, 
                                   sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
  message(paste("  Looking for:", harvest_prob_file))
@ -846,7 +849,7 @@ main <- function() {
        total_acreage = sum(field_data$Acreage, na.rm = TRUE),
        mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
        median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
-        mean_cv = round(mean(field_data$CI_CV, na.rm = TRUE), 4),
+        mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
        week = current_week,
        year = year,
        date = as.character(end_date)
--- a/r_app/80_utils_common.R
+++ b/r_app/80_utils_common.R
@ -605,7 +605,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
    NULL
  }
-  output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
+  output_subdir <- file.path(reports_dir, "field_analysis")
  if (!dir.exists(output_subdir)) {
    dir.create(output_subdir, recursive = TRUE)
  }
@ -637,7 +637,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
  )
  rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
-  rds_path <- file.path(reports_dir, "kpis", rds_filename)
+  rds_path <- file.path(reports_dir, rds_filename)
  saveRDS(kpi_data, rds_path)
  message(paste("✓ Field analysis RDS exported to:", rds_path))
@ -683,8 +683,16 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
  message(paste("  Found", length(per_field_files), "per-field mosaic file(s) for week", week_num))
  results_list <- list()
  # Initialize progress bar
  pb <- progress::progress_bar$new(
    format = "  [:bar] :percent | Field :current/:total",
    total = length(per_field_files),
    width = 60
  )
  # Process each field's mosaic
  for (field_idx in seq_along(per_field_files)) {
    pb$tick()  # Update progress bar
    field_name <- names(per_field_files)[field_idx]
    field_file <- per_field_files[[field_name]]
@ -751,8 +759,6 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
        stringsAsFactors = FALSE
      )
      message(paste("    Field", field_idx, "of", length(per_field_files), "processed"))
    }, error = function(e) {
      message(paste("    [ERROR] Field", field_name, ":", e$message))
    })
@ -773,7 +779,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
                                           mosaic_dir, reports_dir, report_date = Sys.Date()) {
  rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
-  rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
+  rds_path <- file.path(reports_dir, "field_stats", rds_filename)
  if (file.exists(rds_path)) {
    message(paste("Loading cached statistics from:", basename(rds_path)))
@ -783,7 +789,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
  message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
  stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date)
-  output_dir <- file.path(reports_dir, "kpis", "field_stats")
+  output_dir <- file.path(reports_dir, "field_stats")
  if (!dir.exists(output_dir)) {
    dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
  }
@ -812,7 +818,7 @@ load_historical_field_data <- function(project_dir, current_week, current_year,
    target_year <- target$year
    csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv")
-    csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)
+    csv_path <- file.path(reports_dir, "field_analysis", csv_filename)
    if (file.exists(csv_path)) {
      tryCatch({
@ -867,7 +873,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
  prev_field_analysis <- NULL
  tryCatch({
-    analysis_dir <- file.path(reports_dir, "kpis", "field_analysis")
+    analysis_dir <- file.path(reports_dir, "field_analysis")
    if (dir.exists(analysis_dir)) {
      analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
      if (length(analysis_files) > 0) {
@ -899,7 +905,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
      }
      rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
-      rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
+      rds_path <- file.path(reports_dir, "field_stats", rds_filename)
      if (file.exists(rds_path)) {
        tryCatch({
@ -920,7 +926,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
      }
      rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
-      rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
+      rds_path <- file.path(reports_dir, "field_stats", rds_filename)
      if (file.exists(rds_path)) {
        tryCatch({
--- a/r_app/MANUAL_PIPELINE_RUNNER.R
+++ b/r_app/MANUAL_PIPELINE_RUNNER.R
@ -76,12 +76,19 @@
 #   python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
 #
 # COMMAND #2 - Batch Download (Multiple Dates):
 #   For date ranges, MUST use download_planet_missing_dates.py (not Script 00)
 #
 #   python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
 #
 # Example:
 #   python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
 #
 # IMPORTANT DISTINCTION:
 #   - Script 00 (00_download_8band_pu_optimized.py): Only supports --date flag for SINGLE dates
 #   - Script download_planet_missing_dates.py: Supports --start/--end for DATE RANGES
 #   Script 00 does NOT have --start/--end flags despite documentation suggestion
 #   Use the correct script for your use case!
 #
 # EXPECTED OUTPUT:
 #   laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
 #
@ -110,15 +117,27 @@
 #   - One TIFF per field per date (1185 fields × N dates in Angata)
 #
 # PARAMETERS:
-#   PROJECT:  angata, chemba, xinavane, esa, simba
+#   PROJECT:  angata, chemba, xinavane, esa, simba (default: angata)
 #   END_DATE: YYYY-MM-DD format (e.g., 2026-02-09, default: today)
 #   OFFSET:   Days to look back (e.g., 7 for one week, default: 7)
 #
-# COMMAND:
+# COMMAND #1 - Default (All dates, current date, 7-day window):
 #
-#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT]
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
 #
 # Example:
 #   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
 #
 # COMMAND #2 - Specific Date Range:
 #
 #   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] [END_DATE] [OFFSET]
 #
 # Example (one week back from 2026-02-09):
 #   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
 #
 # Example (two weeks back from 2026-02-09):
 #   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 14
 #
 # EXPECTED OUTPUT:
 #   Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
 #   Storage location: laravel_app/storage/app/angata/field_tiles/
@ -157,7 +176,7 @@
 #   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
 #
 # Example:
-#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-09 7
 #
 # EXPECTED OUTPUT:
 #   Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
@ -170,12 +189,6 @@
 #   Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
 #   To process all existing merged_tif files: Use large OFFSET (e.g., 365)
 #
 # TROUBLESHOOTING: 
 #   ❌ If field_tiles_CI has fewer files than field_tiles:
 #   - Check if all field_tiles/{FIELD}/{DATE}.tif files exist
 #   - Script 20 may be skipping due to incomplete source files
 #   - Solution: Delete problematic files from field_tiles and re-run Script 10
 #
 # ============================================================================
@ -208,7 +221,6 @@
 # EXPECTED OUTPUT:
 #   File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
 #   Contains: Interpolated CI data for all fields (wide format)
 #   Script execution time: 5-15 minutes
 #
 # ============================================================================
@ -243,7 +255,6 @@
 # EXPECTED OUTPUT:
 #   File: ci_data_for_python.csv (~5-10 MB)
 #   Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
 #   Script execution time: 1-2 minutes
 #
 # ============================================================================
@ -283,7 +294,6 @@
 # EXPECTED OUTPUT:
 #   File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
 #   Rows: One per field (e.g., 1185 rows for Angata)
 #   Script execution time: 2-5 minutes
 #
 # NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
 #
@ -319,9 +329,6 @@
 # Example (one week window):
 #   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
 #
 # Example (two week window):
 #   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 14 angata
 #
 # EXPECTED OUTPUT:
 #   Location: laravel_app/storage/app/angata/weekly_mosaic/
 #   Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
@ -360,23 +367,23 @@
 #   - 21 columns with field-level KPIs and alerts
 #
 # PARAMETERS:
-#   PROJECT:  angata, chemba, xinavane, esa, simba
+#   END_DATE: Report date in YYYY-MM-DD format (default: today)
-#   WEEK:     ISO week number (1-53, optional - default current week)
+#   PROJECT:  Project name: angata, chemba, xinavane, esa, simba (default: angata)
-#   YEAR:     ISO year (optional - default current year)
+#   OFFSET:   Days to look back for historical comparison (default: 7, for backward compatibility)
 #
-# COMMAND #1 - Current Week (Auto-detects from TODAY):
+# COMMAND #1 - Current Date & Default Project (Auto-detects TODAY):
 #
-#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT]
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
 #
 # Example:
-#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
 #
-# COMMAND #2 - Specific Week & Year:
+# COMMAND #2 - Specific Date & Project:
 #
-#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT] [WEEK] [YEAR]
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET]
 #
-# Example (Week 5, Year 2026):
+# Example (2026-02-09, angata, 7-day lookback):
-#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-09 angata 7
 #
 # EXPECTED OUTPUT:
 #   File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
@ -390,6 +397,11 @@
 #   tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
 #   spatial_cluster, alert_urgency, alert_type, alert_message, etc.
 #
 # CRITICAL DIFFERENCE - R80 Uses Different Argument Order Than R40:
 #   R40 order: [END_DATE] [OFFSET] [PROJECT]
 #   R80 order: [END_DATE] [PROJECT] [OFFSET]
 #   These are NOT the same! Ensure correct order for each script.
 #
 # ============================================================================
@ -469,12 +481,15 @@
 #
 # Steps:
 #   1. SKIP Python download (if you already have data)
-#   2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
+#   2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 7
 #      (Argument order: [PROJECT] [END_DATE] [OFFSET])
 #   3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
 #   4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
 #   5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
 #   6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
-#   7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata
+#      (Argument order: [END_DATE] [OFFSET] [PROJECT])
 #   7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-04 angata 7
 #      (Argument order: [END_DATE] [PROJECT] [OFFSET] - DIFFERENT from R40!)
 #   8. OPTIONAL R91 (Cane Supply) - Use automated runner:
 #      & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
 #      OR from R console:
@ -492,7 +507,9 @@
 #
 # Steps:
 #   1. Python download (your entire date range)
-#   2. Run R10 once (processes all dates)
+#   2. Run R10 with large offset to process all historical dates:
 #      & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 365
 #      (This processes from 2025-02-04 to 2026-02-04, covering entire year)
 #   3. Run R20 with large offset to process all historical dates:
 #      & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
 #      (This processes from 2025-02-04 to 2026-02-04, covering entire year)
@ -611,3 +628,4 @@
 #   laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
 #
 # ==============================================================================