added some more stats and functions calculate and export csv and rds file for future ref

2026-01-16 08:04:45 +01:00 · 2026-01-16 08:04:45 +01:00 · 6e88acef25
parent 4e94a9a78b
commit 6e88acef25
1 changed files with 345 additions and 243 deletions
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@ -65,11 +65,6 @@
 #   [✓] Add Cloud_pct_clear (% from pixel coverage)
 #   [✓] Column order: Reorder to match spec (1-21)
 #
-# PHASE 1 LIMITATION (Known Issue - To Fix in PHASE 2):
-#   - Fields spanning multiple tiles currently show statistics from first intersecting tile only
-#   - This results in p10 ≈ p90 (few pixels per tile) instead of field-wide percentiles
-#   - FIX: After extracting all tiles, group by field_id and aggregate pixel values across all tiles
-#         before calculating percentiles. This will give true field-wide statistics.
 #
 # PHASE 2 - MEDIUM (Requires computation):
 #   [ ] Add nmr_weeks_in_this_phase (track phase transitions with previous week CSV)
@ -539,6 +534,245 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL)
  })
 }

+# ============================================================================
+# MODULAR STATISTICS CALCULATION (Reusable for any week)
+# ============================================================================
+
+calculate_field_statistics <- function(field_boundaries_sf, week_num, year, 
+                                        mosaic_dir, report_date = Sys.Date()) {
+  
+  message(paste("Calculating statistics for all fields - Week", week_num, year))
+  
+  # Build tile file list
+  tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
+  tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
+  
+  if (length(tile_files) == 0) {
+    stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
+  }
+  
+  message(paste("  Found", length(tile_files), "tiles for week", week_num))
+  
+  results_list <- list()
+  fields_processed <- 0
+  
+  # SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
+  for (tile_idx in seq_along(tile_files)) {
+    tile_file <- tile_files[tile_idx]
+    
+    tryCatch({
+      # Load tile
+      current_rast <- terra::rast(tile_file)
+      ci_band <- current_rast[["CI"]]
+      
+      if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
+        message(paste("    [SKIP] Tile", basename(tile_file), "- CI band not found"))
+        return(NULL)
+      }
+      
+      # Extract all fields from this tile in ONE call
+      # terra::extract returns a dataframe with columns: ID, CI
+      # where each row is one pixel, and ID indicates which polygon it came from
+      extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE)
+      
+      # Group by field ID and calculate statistics for each field
+      # extracted$ID contains the field polygon index (1 to nrow(field_boundaries_sf))
+      unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)])
+      
+      for (field_poly_idx in unique_field_ids) {
+        # Get all CI values for this field from this tile
+        field_id <- field_boundaries_sf$field[field_poly_idx]
+        ci_vals <- extracted$CI[extracted$ID == field_poly_idx]
+        ci_vals <- ci_vals[!is.na(ci_vals)]
+        
+        # Skip if no data for this field in this tile
+        if (length(ci_vals) == 0) {
+          next
+        }
+        
+        # Calculate statistics
+        mean_ci <- mean(ci_vals, na.rm = TRUE)
+        ci_std <- sd(ci_vals, na.rm = TRUE)
+        cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
+        range_min <- min(ci_vals, na.rm = TRUE)
+        range_max <- max(ci_vals, na.rm = TRUE)
+        range_str <- sprintf("%.1f-%.1f", range_min, range_max)
+        ci_percentiles_str <- get_ci_percentiles(ci_vals)
+        
+        # Cloud coverage: count total pixels vs non-NA pixels for this field
+        field_rows <- extracted[extracted$ID == field_poly_idx, ]
+        num_total <- nrow(field_rows)
+        num_data <- sum(!is.na(field_rows$CI))
+        pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
+        cloud_cat <- if (num_data == 0) "No image available"
+                     else if (pct_clear >= 99.5) "Clear view"
+                     else "Partial coverage"
+        
+        # Age and Phase
+        age_weeks <- if (USE_UNIFORM_AGE) {
+          as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
+        } else {
+          NA_real_
+        }
+        phase <- get_phase_by_age(age_weeks)
+        
+        # Germination progress (only for young plants, age < 17 weeks)
+        germination_progress <- NA_character_
+        if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks < 17) {
+          pct_ci_ge_threshold <- sum(ci_vals >= GERMINATION_CI_THRESHOLD) / length(ci_vals) * 100
+          germination_progress <- sprintf("%.1f%%", pct_ci_ge_threshold)
+        }
+        
+        # Store result (check if field already exists from another tile)
+        existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id)
+        
+        if (length(existing_idx) > 0) {
+          # Field already in results from previous tile - keep first occurrence or average
+          # For now, keep the first one (earlier tiles)
+          next
+        }
+        
+        # Store new field result
+        results_list[[length(results_list) + 1]] <- data.frame(
+          Field_id = field_id,
+          Mean_CI = round(mean_ci, 2),
+          CV = round(cv, 4),
+          CI_range = range_str,
+          CI_Percentiles = ci_percentiles_str,
+          Cloud_pct_clear = pct_clear,
+          Cloud_category = cloud_cat,
+          Age_week = round(age_weeks, 1),
+          Phase = phase,
+          Germination_progress = germination_progress,
+          stringsAsFactors = FALSE
+        )
+        
+        fields_processed <- fields_processed + 1
+      }
+      
+      message(paste("    Tile", tile_idx, "of", length(tile_files), "processed"))
+      
+    }, error = function(e) {
+      message(paste("    [ERROR] Tile", basename(tile_file), ":", e$message))
+    })
+  }
+  
+  if (length(results_list) == 0) {
+    stop(paste("No fields processed successfully for week", week_num))
+  }
+  
+  stats_df <- dplyr::bind_rows(results_list)
+  message(paste("  ✓ Successfully calculated statistics for", nrow(stats_df), "fields"))
+  
+  return(stats_df)
+}
+
+# ============================================================================
+# CALCULATE KPI TRENDS (Requires previous week RDS)
+# ============================================================================
+
+calculate_kpi_trends <- function(current_stats, prev_stats = NULL) {
+  
+  message("Calculating KPI trends from current and previous week data")
+  
+  # Initialize new columns with defaults
+  current_stats$Weekly_ci_change <- NA_real_
+  current_stats$CV_Trend_Short_Term <- NA_real_
+  current_stats$nmr_weeks_in_this_phase <- 1L
+  
+  # If no previous week data, return with defaults
+  if (is.null(prev_stats) || nrow(prev_stats) == 0) {
+    message("  No previous week data available - using defaults")
+    return(current_stats)
+  }
+  
+  # Build lookup indices for previous week (by Field_id)
+  prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id)
+  
+  # For each field in current week, lookup previous values
+  for (i in seq_len(nrow(current_stats))) {
+    field_id <- current_stats$Field_id[i]
+    prev_idx <- prev_lookup[field_id]
+    
+    if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) {
+      # Field exists in previous week
+      prev_row <- prev_stats[prev_idx, ]
+      
+      # Weekly CI change (current Mean_CI - previous Mean_CI)
+      if (!is.na(prev_row$Mean_CI) && !is.na(current_stats$Mean_CI[i])) {
+        current_stats$Weekly_ci_change[i] <- 
+          round(current_stats$Mean_CI[i] - prev_row$Mean_CI, 2)
+      }
+      
+      # CV short-term trend (current CV - previous CV)
+      if (!is.na(prev_row$CV) && !is.na(current_stats$CV[i])) {
+        current_stats$CV_Trend_Short_Term[i] <- 
+          round(current_stats$CV[i] - prev_row$CV, 4)
+      }
+      
+      # Weeks in current phase (track phase transitions)
+      if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase)) {
+        if (current_stats$Phase[i] == prev_row$Phase) {
+          # Same phase - increment counter
+          prev_weeks <- if (!is.na(prev_row$nmr_weeks_in_this_phase)) {
+            prev_row$nmr_weeks_in_this_phase
+          } else {
+            1
+          }
+          current_stats$nmr_weeks_in_this_phase[i] <- prev_weeks + 1L
+        } else {
+          # Phase changed - reset to 1
+          current_stats$nmr_weeks_in_this_phase[i] <- 1L
+        }
+      }
+    }
+  }
+  
+  message("  Calculated trends for all fields")
+  return(current_stats)
+}
+
+# ============================================================================
+# LOAD OR CALCULATE WEEKLY STATISTICS (RDS Caching)
+# ============================================================================
+
+load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, 
+                                           mosaic_dir, reports_dir, report_date = Sys.Date()) {
+  
+  # Build RDS file path
+  rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, week_num)
+  rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
+  
+  # Try to load existing RDS (fast cache)
+  if (file.exists(rds_path)) {
+    message(paste("Loading cached statistics from:", basename(rds_path)))
+    return(readRDS(rds_path))
+  }
+  
+  # RDS not found - calculate from tiles
+  message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
+  stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, 
+                                          mosaic_dir, report_date)
+  
+  # Create output directory if needed
+  output_dir <- file.path(reports_dir, "kpis", "field_stats")
+  if (!dir.exists(output_dir)) {
+    dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+  }
+  
+  # Export RDS (for fast lookup next week)
+  saveRDS(stats_df, rds_path)
+  message(paste("Saved weekly statistics RDS:", basename(rds_path)))
+  
+  # Export CSV (for user review)
+  csv_filename <- sprintf("%s_field_stats_week%02d.csv", project_dir, week_num)
+  csv_path <- file.path(output_dir, csv_filename)
+  readr::write_csv(stats_df, csv_path)
+  message(paste("Saved weekly statistics CSV:", basename(csv_path)))
+  
+  return(stats_df)
+}
+
 # ============================================================================
 # PARALLEL FIELD ANALYSIS FUNCTION
 # ============================================================================
@ -1316,248 +1550,116 @@ main <- function() {
  }
  
  # SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
-  message("\nProcessing tiles and extracting field statistics...")
-  all_tile_results <- list()
+  # ============================================================================
+  # NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
+  # ============================================================================
  
-  for (i in seq_along(tile_files)) {
-    tile_file <- tile_files[i]
-    message(paste("  Processing tile", i, "of", length(tile_files), ":", basename(tile_file)))
+  # Build tile grid (needed by calculate_field_statistics)
+  message("\nBuilding tile grid for current week...")
+  tile_grid <- build_tile_grid(mosaic_dir, current_week, year)
  
-    tryCatch({
-      # Load current tile and previous week tile
-      current_rast <- terra::rast(tile_file)
+  message("\nUsing modular RDS-based approach for weekly statistics...")
  
-      # DEBUG: Check tile structure on first tile
-      if (i == 1) {
-        message(paste("    [DEBUG] Tile CRS:", terra::crs(current_rast)))
-        message(paste("    [DEBUG] Tile extent:", paste(terra::ext(current_rast))))
-        message(paste("    [DEBUG] Field boundaries CRS:", sf::st_crs(field_boundaries_sf)))
-        field_bbox <- sf::st_bbox(field_boundaries_sf)
-        message(paste("    [DEBUG] Field bbox:", paste(round(field_bbox, 2))))
-        message(paste("    [DEBUG] Band names:", paste(names(current_rast), collapse=", ")))
-      }
+  # Load/calculate CURRENT week stats (from tiles or RDS cache)
+  message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
+  current_stats <- load_or_calculate_weekly_stats(
+    week_num = current_week,
+    year = year,
+    project_dir = project_dir,
+    field_boundaries_sf = field_boundaries_sf,
+    mosaic_dir = tile_grid$mosaic_dir,
+    reports_dir = reports_dir,
+    report_date = end_date
+  )
  
-      # Extract CI band by name
-      ci_band <- current_rast[["CI"]]
+  message(paste("  ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
  
-      # Check if CI band exists - use proper logical checks
-      if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
-        message(paste("    ERROR: CI band not found. Available bands:", paste(names(current_rast), collapse=", ")))
-        next
-      }
+  # Load/calculate PREVIOUS week stats (from RDS cache or tiles)
+  message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
+  prev_stats <- load_or_calculate_weekly_stats(
+    week_num = previous_week,
+    year = year,
+    project_dir = project_dir,
+    field_boundaries_sf = field_boundaries_sf,
+    mosaic_dir = tile_grid$mosaic_dir,
+    reports_dir = reports_dir,
+    report_date = end_date - 7  # Approximately 1 week before
+  )
  
-      # Check if CI band has any valid data
-      if (tryCatch(all(is.na(values(ci_band))), error = function(e) TRUE)) {
-        message(paste("    ERROR: CI band has no valid data"))
-        next
-      }
+  message(paste("  ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
  
-      # Load previous week tile if available
-      previous_tile_file <- sub(sprintf("week_%02d", current_week), 
-                               sprintf("week_%02d", previous_week), 
-                               tile_file)
-      previous_ci <- NULL
-      if (file.exists(previous_tile_file)) {
-        previous_rast <- terra::rast(previous_tile_file)
-        previous_ci <- previous_rast[["CI"]]
-      }
+  # Apply trend calculations (requires both weeks)
+  message("\n3. Calculating trend columns...")
+  current_stats <- calculate_kpi_trends(current_stats, prev_stats)
  
-      # OPTION 1 + 2: Extract all CI statistics from one pixel extraction (single call)
-      current_stats <- extract_field_statistics_from_ci(ci_band, field_boundaries_sf)
+  message(paste("  ✓ Added Weekly_ci_change, CV_Trend_Short_Term, nmr_weeks_in_this_phase"))
  
-      # DEBUG: Check extraction result on first tile
-      if (i == 1) {
-        num_with_data <- sum(!is.na(current_stats$mean_ci))
-        message(paste("    [DEBUG] Extracted", nrow(current_stats), "fields, ", num_with_data, "with non-NA data"))
-        if (num_with_data > 0) {
-          message(paste("    [DEBUG] Sample mean CIs:", paste(head(current_stats$mean_ci[!is.na(current_stats$mean_ci)], 3), collapse=", ")))
-          # Check percentiles
-          sample_field <- which(!is.na(current_stats$mean_ci))[2]
-          message(paste("    [DEBUG] Field", sample_field, "- p10:", current_stats$p10[sample_field], 
-                       "p90:", current_stats$p90[sample_field], 
-                       "min:", current_stats$min_ci[sample_field],
-                       "max:", current_stats$max_ci[sample_field],
-                       "valid_pixels:", current_stats$pixel_count_valid[sample_field]))
-        }
-      }
+  # ============================================================================
+  # Build final output dataframe with all 21 columns
+  # ============================================================================
  
-      # Extract previous week CI statistics if available
-      previous_stats <- NULL
-      if (!is.null(previous_ci)) {
-        previous_stats <- extract_field_statistics_from_ci(previous_ci, field_boundaries_sf)
-      }
-      
-      # Process each field that was extracted
-      field_results_this_tile <- list()
-      fields_added <- 0
-      
-      for (field_idx in seq_len(nrow(field_boundaries_sf))) {
-        tryCatch({
-          field_id <- field_boundaries_sf$field[field_idx]
-          field_sf <- field_boundaries_sf[field_idx, ]
-          
-          # Get statistics from helper function results
-          # current_stats should have same number of rows as field_boundaries_sf
-          if (field_idx > nrow(current_stats)) {
-            message(paste("    [ERROR] field_idx", field_idx, "> nrow(current_stats)", nrow(current_stats)))
-            next
-          }
-          
-          mean_ci_current <- current_stats$mean_ci[field_idx]
-          
-          # SKIP fields with no data in this tile (they don't intersect this tile)
-          if (is.na(current_stats$pixel_count_valid[field_idx]) || current_stats$pixel_count_valid[field_idx] == 0) {
-            next
-          }
-          ci_cv_current <- current_stats$cv[field_idx]
-          ci_percentile_low <- current_stats$p10[field_idx]
-          ci_percentile_high <- current_stats$p90[field_idx]
-          
-          # If field doesn't intersect this tile, mean_ci_current will be NA
-          if (is.na(mean_ci_current)) {
-            next  # Skip this field - doesn't intersect this tile
-          }
+  message("\nBuilding final field analysis output...")
  
+  field_analysis_df <- current_stats %>%
+    mutate(
+      # Column 2: Farm_Section (user fills)
+      Farm_Section = NA_character_,
+      # Column 3: Field_name (from GeoJSON - already have Field_id, can look up)
+      Field_name = Field_id,
+      # Column 4: Acreage (calculate from geometry)
+      Acreage = {
+        acreages <- sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
+          field_sf <- field_boundaries_sf[idx, ]
          field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
-          field_area_acres <- field_area_ha / 0.404686
-          
-          # Extract previous week CI if available
-          mean_ci_previous <- NA
-          ci_change <- NA
-          if (!is.null(previous_stats)) {
-            mean_ci_previous <- previous_stats$mean_ci[field_idx]
-            if (!is.na(mean_ci_previous)) {
-              ci_change <- mean_ci_current - mean_ci_previous
-            }
-          }
-          
-          # Reconstruct pixel values for status trigger (we need the actual pixel array)
-          # Use the percentiles and mean to create a synthetic distribution for status_trigger
-          # For now, use mean CI repeated by pixel count for testing
-          # TODO: Consider extracting pixels directly if needed for more complex triggers
-          pixel_count_valid <- current_stats$pixel_count_valid[field_idx]
-          ci_vals_current <- if (!is.na(pixel_count_valid) && pixel_count_valid > 0) {
-            rep(mean_ci_current, pixel_count_valid)  # Simplified: use mean value repeated
-          } else {
-            numeric(0)
-          }
-          
-          # Calculate age
-          age_weeks <- if (!is.null(planting_dates) && nrow(planting_dates) > 0 && field_idx <= nrow(planting_dates)) {
-            planting_date <- planting_dates$date[field_idx]
-            if (!is.na(planting_date)) {
-              as.numeric(difftime(end_date, planting_date, units = "weeks"))
-            } else {
-              0
-            }
-          } else {
-            0
-          }
-          
-          # Get phase and status
-          phase <- get_phase_by_age(age_weeks)
-          status_trigger <- get_status_trigger(ci_vals_current, ci_change, age_weeks)
-          
-          # Calculate Cloud_pct_clear: percentage of field with valid data
-          # Binned to 10% intervals (0, 10, 20, ..., 90, 100)
-          cloud_pct_clear <- {
-            valid_count <- current_stats$pixel_count_valid[field_idx]
-            total_count <- current_stats$pixel_count_total[field_idx]
-            if (!is.na(valid_count) && !is.na(total_count) && total_count > 0) {
-              pct <- (valid_count / total_count) * 100
-              round(pct / 10) * 10
-            } else {
-              NA_real_
-            }
-          }
-          
-          # Cloud categorization based on pixel coverage (Cloud_pct_clear)
-          cloud_category <- if (is.na(cloud_pct_clear)) {
-            "No image available"
-          } else if (cloud_pct_clear >= 90) {
-            "Clear view"
-          } else if (cloud_pct_clear > 0) {
-            "Partial coverage"
-          } else {
-            "No image available"
-          }
-          
-          # Get min/max CI values
-          ci_min <- current_stats$min_ci[field_idx]
-          ci_max <- current_stats$max_ci[field_idx]
-          ci_range <- if (!is.na(ci_min) && !is.na(ci_max)) {
-            sprintf("%.1f-%.1f", ci_min, ci_max)
-          } else {
-            NA_character_
-          }
-          
-          # Get field_name from field_boundaries_sf
-          field_name <- field_sf$field
-          
-          # Build result row (21 columns per specification, in order)
-          result_row <- data.frame(
-            Field_id = field_id,
-            Farm_Section = NA_character_,
-            Field_name = field_name,
-            Acreage = field_area_acres,
-            Mean_CI = mean_ci_current,
-            Weekly_ci_change = ci_change,
-            Four_week_trend = NA_character_,
-            Last_harvest_or_planting_date = UNIFORM_PLANTING_DATE,
-            Age_weeks = age_weeks,
-            Phase = phase,
-            nmr_weeks_in_this_phase = NA_real_,
-            Germination_progress = NA_real_,
-            Imminent_prob = NA_real_,
-            Status_trigger = status_trigger,
-            CI_range = ci_range,
-            CI_Percentiles = if (!is.na(ci_percentile_low) && !is.na(ci_percentile_high)) {
-              sprintf("%.1f-%.1f", ci_percentile_low, ci_percentile_high)
-            } else {
-              NA_character_
-            },
-            CV = ci_cv_current,
-            CV_Trend_Short_Term = NA_real_,
-            CV_Trend_Long_Term = NA_real_,
-            Cloud_pct_clear = cloud_pct_clear,
-            Cloud_category = cloud_category,
-            stringsAsFactors = FALSE
-          )
-          
-          field_results_this_tile[[as.character(field_id)]] <- result_row
-          fields_added <- fields_added + 1
-          
-        }, error = function(e) {
-          # Show error for debugging
-          message(paste("      [FIELD ERROR] Field", field_idx, ":", e$message))
+          field_area_ha / 0.404686
        })
-      }
+        acreages[match(Field_id, field_boundaries_sf$field)]
+      },
+      # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
+      # Column 7: Four_week_trend (Phase 3 future)
+      Four_week_trend = NA_character_,
+      # Column 8: Last_harvest_or_planting_date (dummy for now)
+      Last_harvest_or_planting_date = UNIFORM_PLANTING_DATE,
+      # Columns 9-10: Already in current_stats (Age_week, Phase)
+      # Column 11: nmr_weeks_in_this_phase (already calculated)
+      # Column 12: Germination_progress (already calculated)
+      # Column 13: Imminent_prob (placeholder)
+      Imminent_prob = "placeholder data",
+      # Column 14: Status_trigger (need to add)
+      Status_trigger = {
+        triggers <- sapply(seq_len(nrow(current_stats)), function(idx) {
+          field_id <- current_stats$Field_id[idx]
+          field_idx <- which(field_boundaries_sf$field == field_id)[1]
+          if (is.na(field_idx)) return(NA_character_)
          
-      if (length(field_results_this_tile) > 0) {
-        all_tile_results[[basename(tile_file)]] <- dplyr::bind_rows(field_results_this_tile)
-        message(paste("    Extracted", length(field_results_this_tile), "fields from tile (processed", fields_added, "fields total)"))
-      } else {
-        message(paste("    WARNING: No fields extracted from this tile (processed", fields_added, "fields, all either NA or errored)"))
-      }
+          # Reconstruct CI values from Mean_CI for status trigger logic
+          # For now, use simplified approach
+          age_w <- current_stats$Age_week[idx]
+          ci_change <- current_stats$Weekly_ci_change[idx]
          
-    }, error = function(e) {
-      message(paste("  Error processing tile", basename(tile_file), ":", e$message))
-    })
-  }
+          # Using mean CI as proxy (could be improved with pixel distribution)
+          ci_vals <- rep(current_stats$Mean_CI[idx], 100)
+          get_status_trigger(ci_vals, ci_change, age_w)
+        })
+        triggers
+      },
+      # Columns 15-16: Already in current_stats (CI_range, CI_Percentiles)
+      # Column 17: Already in current_stats (CV)
+      # Column 18: Already in current_stats (CV_Trend_Short_Term)
+      # Column 19: CV_Trend_Long_Term (Phase 3 future)
+      CV_Trend_Long_Term = NA_real_,
+      # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
+      .keep = "all"  # Keep all existing columns
+    ) %>%
+    select(
+      Field_id, Farm_Section, Field_name, Acreage, Mean_CI, Weekly_ci_change,
+      Four_week_trend, Last_harvest_or_planting_date, Age_week, Phase,
+      nmr_weeks_in_this_phase, Germination_progress, Imminent_prob, Status_trigger,
+      CI_range, CI_Percentiles, CV, CV_Trend_Short_Term, CV_Trend_Long_Term,
+      Cloud_pct_clear, Cloud_category
+    )
  
-  # Combine all tile results, keeping unique fields (may appear in multiple tiles)
-  if (length(all_tile_results) == 0) {
-    stop("No fields extracted from any tiles!")
-  }
-  
-  field_analysis_df <- dplyr::bind_rows(all_tile_results) %>%
-    distinct(Field_id, .keep_all = TRUE)
-  
-  if (nrow(field_analysis_df) == 0) {
-    stop("No fields analyzed successfully!")
-  }
-  
-  message(paste("✓ Analyzed", nrow(field_analysis_df), "fields"))
+  message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
  
  summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)