From 8d84c8cab550dbd59f5a4aec87935a6d9ae9afa5 Mon Sep 17 00:00:00 2001
From: Timon <timon@resiliencebv.com>
Date: Fri, 16 Jan 2026 08:30:47 +0100
Subject: [PATCH] phase 2 cv trend implemented

---
 r_app/80_calculate_kpis.R | 202 ++++++++++++++++++++++++++++++++------
 1 file changed, 172 insertions(+), 30 deletions(-)

diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R
index 3786f0e..9d02fa4 100644
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@@ -98,6 +98,10 @@ TEST_MODE_NUM_WEEKS <- 2
 # Percentage of pixels that must reach this CI value to count as "germinated"
 GERMINATION_CI_THRESHOLD <- 2.0  # Pixels with CI >= 2 count as germinated
 
+# FOR TESTING: Set these fields as "recently planted" to demonstrate germination progress
+YOUNG_FIELDS_FOR_TESTING <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")  # First 10 field IDs
+YOUNG_FIELD_PLANTING_DATE <- as.Date("2026-01-01")  # Recently planted for demo
+
 # FOUR-WEEK TREND THRESHOLDS
 FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
 FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
@@ -543,6 +547,10 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
   
   message(paste("Calculating statistics for all fields - Week", week_num, year))
   
+  # Debug: Check if constants are available
+  message(paste("  DEBUG: YOUNG_FIELDS_FOR_TESTING =", paste(YOUNG_FIELDS_FOR_TESTING, collapse=", ")))
+  message(paste("  DEBUG: YOUNG_FIELD_PLANTING_DATE =", YOUNG_FIELD_PLANTING_DATE))
+  
   # Build tile file list
   tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
   tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
@@ -609,10 +617,19 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
                      else "Partial coverage"
         
         # Age and Phase
-        age_weeks <- if (USE_UNIFORM_AGE) {
-          as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
-        } else {
-          NA_real_
+        age_weeks <- NA_real_
+        if (USE_UNIFORM_AGE) {
+          # Check if this field is in the "young fields" list (for testing germination progress)
+          is_young_field <- field_id %in% YOUNG_FIELDS_FOR_TESTING
+          if (is_young_field) {
+            age_weeks <- as.numeric(difftime(report_date, YOUNG_FIELD_PLANTING_DATE, units = "weeks"))
+            # Debug for first 2 matches
+            if (field_id %in% c("1", "2")) {
+              message(paste("  DEBUG: Field", field_id, "is young field, age =", round(age_weeks, 2), "weeks"))
+            }
+          } else {
+            age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
+          }
         }
         phase <- get_phase_by_age(age_weeks)
         
@@ -686,49 +703,138 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL) {
     return(current_stats)
   }
   
+  message(paste("  prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns"))
+  message(paste("  prev_stats columns:", paste(names(prev_stats), collapse = ", ")))
+  
   # Build lookup indices for previous week (by Field_id)
   prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id)
   
+  # Try to load previous week's field_analysis to get nmr_weeks_in_this_phase history
+  prev_field_analysis <- NULL
+  prev_analysis_csv <- file.path(
+    reports_dir, "kpis", "field_analysis",
+    sprintf("%s_field_analysis_week%02d.csv", 
+            paste(strsplit(current_stats$Field_id[1], "")[[1]][1], collapse=""),  # Get project from field
+            as.numeric(format(Sys.Date() - 7, "%V")))  # Approximate previous week
+  )
+  
+  # Better way: construct the previous week number properly
+  current_week_num <- as.numeric(format(Sys.Date(), "%V"))
+  prev_week_num <- current_week_num - 1
+  if (prev_week_num < 1) prev_week_num <- 52
+  
+  # This is a bit tricky - we need the project_dir from the main scope
+  # For now, assume we can infer it or pass it through
+  # Let's use a simpler approach: look for any field_analysis_week* file that's recent
+  
+  tryCatch({
+    analysis_dir <- file.path(reports_dir, "kpis", "field_analysis")
+    if (dir.exists(analysis_dir)) {
+      # Find the most recent field_analysis CSV (should be previous week)
+      analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
+      if (length(analysis_files) > 0) {
+        # Sort by modification time and get the most recent
+        recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)]
+        prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, 
+                                               col_select = c(Field_id, nmr_weeks_in_this_phase, Phase))
+      }
+    }
+  }, error = function(e) {
+    message(paste("  Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message))
+  })
+  
+  if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
+    message(paste("  Using previous field_analysis to track nmr_weeks_in_this_phase"))
+  }
+  
   # For each field in current week, lookup previous values
+  cv_trends_calculated <- 0
   for (i in seq_len(nrow(current_stats))) {
     field_id <- current_stats$Field_id[i]
     prev_idx <- prev_lookup[field_id]
     
     if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) {
-      # Field exists in previous week
-      prev_row <- prev_stats[prev_idx, ]
+      # Field exists in previous week - extract row carefully
+      prev_row <- prev_stats[prev_idx, , drop = FALSE]  # Keep as dataframe
+      
+      if (nrow(prev_row) == 0) {
+        # Field not found - skip
+        next
+      }
+      
+      # Access values from single-row dataframe
+      prev_mean_ci <- prev_row$Mean_CI[1]
+      prev_cv <- prev_row$CV[1]
+      prev_phase <- prev_row$Phase[1]
       
       # Weekly CI change (current Mean_CI - previous Mean_CI)
-      if (!is.na(prev_row$Mean_CI) && !is.na(current_stats$Mean_CI[i])) {
+      if (!is.na(prev_mean_ci) && !is.na(current_stats$Mean_CI[i])) {
         current_stats$Weekly_ci_change[i] <- 
-          round(current_stats$Mean_CI[i] - prev_row$Mean_CI, 2)
+          round(current_stats$Mean_CI[i] - prev_mean_ci, 2)
       }
       
       # CV short-term trend (current CV - previous CV)
-      if (!is.na(prev_row$CV) && !is.na(current_stats$CV[i])) {
-        current_stats$CV_Trend_Short_Term[i] <- 
-          round(current_stats$CV[i] - prev_row$CV, 4)
+      # DEBUG: Check first few fields
+      if (i <= 3) {
+        message(paste("    Field", field_id, "- CV_prev:", prev_cv, "CV_curr:", current_stats$CV[i]))
+      }
+      
+      if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) {
+        trend_val <- round(current_stats$CV[i] - prev_cv, 4)
+        current_stats$CV_Trend_Short_Term[i] <- trend_val
+        cv_trends_calculated <- cv_trends_calculated + 1
+        
+        if (i <= 3) {
+          message(paste("      -> CV_Trend =", trend_val))
+        }
       }
       
       # Weeks in current phase (track phase transitions)
-      if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase)) {
-        if (current_stats$Phase[i] == prev_row$Phase) {
-          # Same phase - increment counter
-          prev_weeks <- if (!is.na(prev_row$nmr_weeks_in_this_phase)) {
-            prev_row$nmr_weeks_in_this_phase
-          } else {
-            1
+      # Use previous field_analysis if available for proper counter progression
+      if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
+        # Look up this field in previous analysis
+        prev_analysis_row <- prev_field_analysis %>% 
+          dplyr::filter(Field_id == field_id)
+        
+        if (nrow(prev_analysis_row) > 0) {
+          prev_phase_analysis <- prev_analysis_row$Phase[1]
+          prev_nmr_weeks_analysis <- prev_analysis_row$nmr_weeks_in_this_phase[1]
+          
+          if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase_analysis)) {
+            if (current_stats$Phase[i] == prev_phase_analysis) {
+              # Same phase - increment the counter
+              current_stats$nmr_weeks_in_this_phase[i] <- 
+                if (!is.na(prev_nmr_weeks_analysis)) prev_nmr_weeks_analysis + 1L else 2L
+            } else {
+              # Phase changed - reset to 1
+              current_stats$nmr_weeks_in_this_phase[i] <- 1L
+            }
+          }
+        } else if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase)) {
+          # Field not in previous analysis, fall back to prev_stats phase comparison
+          if (current_stats$Phase[i] == prev_phase) {
+            current_stats$nmr_weeks_in_this_phase[i] <- 2L
+          } else {
+            current_stats$nmr_weeks_in_this_phase[i] <- 1L
+          }
+        }
+      } else {
+        # No previous field_analysis available - use phase from prev_stats
+        if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase)) {
+          if (current_stats$Phase[i] == prev_phase) {
+            # Same phase - increment counter (start with 2)
+            current_stats$nmr_weeks_in_this_phase[i] <- 2L
+          } else {
+            # Phase changed - reset to 1
+            current_stats$nmr_weeks_in_this_phase[i] <- 1L
           }
-          current_stats$nmr_weeks_in_this_phase[i] <- prev_weeks + 1L
-        } else {
-          # Phase changed - reset to 1
-          current_stats$nmr_weeks_in_this_phase[i] <- 1L
         }
       }
     }
   }
   
-  message("  Calculated trends for all fields")
+  message(paste("  ✓ Calculated", cv_trends_calculated, "CV_Trend_Short_Term values out of", nrow(current_stats), "fields"))
+  message(paste("    CV_Trend_Short_Term non-NA values:", sum(!is.na(current_stats$CV_Trend_Short_Term))))
   return(current_stats)
 }
 
@@ -1576,6 +1682,10 @@ main <- function() {
   
   # Load/calculate PREVIOUS week stats (from RDS cache or tiles)
   message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
+  
+  # Calculate report date for previous week (7 days before current)
+  prev_report_date <- end_date - 7
+  
   prev_stats <- load_or_calculate_weekly_stats(
     week_num = previous_week,
     year = year,
@@ -1583,10 +1693,12 @@ main <- function() {
     field_boundaries_sf = field_boundaries_sf,
     mosaic_dir = tile_grid$mosaic_dir,
     reports_dir = reports_dir,
-    report_date = end_date - 7  # Approximately 1 week before
+    report_date = prev_report_date
   )
   
   message(paste("  ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
+  message(paste("    Columns in prev_stats:", paste(names(prev_stats), collapse = ", ")))
+  message(paste("    CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV))))
   
   # Apply trend calculations (requires both weeks)
   message("\n3. Calculating trend columns...")
@@ -1600,6 +1712,40 @@ main <- function() {
   
   message("\nBuilding final field analysis output...")
   
+  # Pre-calculate acreages with geometry validation
+  # This avoids geometry errors during field_analysis construction
+  acreage_lookup <- tryCatch({
+    lookup_df <- field_boundaries_sf %>%
+      sf::st_drop_geometry() %>%
+      as.data.frame() %>%
+      mutate(
+        geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
+          tryCatch({
+            sf::st_is_valid(field_boundaries_sf[idx, ])
+          }, error = function(e) FALSE)
+        }),
+        area_ha = 0
+      )
+    
+    # Calculate area for valid geometries
+    for (idx in which(lookup_df$geometry_valid)) {
+      tryCatch({
+        area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
+        lookup_df$area_ha[idx] <- area_m2 / 10000
+      }, error = function(e) {
+        lookup_df$area_ha[idx] <<- NA_real_
+      })
+    }
+    
+    # Convert hectares to acres
+    lookup_df %>%
+      mutate(acreage = area_ha / 0.404686) %>%
+      select(field, acreage)
+  }, error = function(e) {
+    message(paste("Warning: Could not calculate acreages from geometries -", e$message))
+    data.frame(field = character(0), acreage = numeric(0))
+  })
+  
   field_analysis_df <- current_stats %>%
     mutate(
       # Column 2: Farm_Section (user fills)
@@ -1608,12 +1754,8 @@ main <- function() {
       Field_name = Field_id,
       # Column 4: Acreage (calculate from geometry)
       Acreage = {
-        acreages <- sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
-          field_sf <- field_boundaries_sf[idx, ]
-          field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
-          field_area_ha / 0.404686
-        })
-        acreages[match(Field_id, field_boundaries_sf$field)]
+        acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
+        if_else(is.na(acreages_vec), 0, acreages_vec)
       },
       # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
       # Column 7: Four_week_trend (Phase 3 future)