From 8d84c8cab550dbd59f5a4aec87935a6d9ae9afa5 Mon Sep 17 00:00:00 2001 From: Timon Date: Fri, 16 Jan 2026 08:30:47 +0100 Subject: [PATCH] phase 2 cv trend implemented --- r_app/80_calculate_kpis.R | 202 ++++++++++++++++++++++++++++++++------ 1 file changed, 172 insertions(+), 30 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 3786f0e..9d02fa4 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -98,6 +98,10 @@ TEST_MODE_NUM_WEEKS <- 2 # Percentage of pixels that must reach this CI value to count as "germinated" GERMINATION_CI_THRESHOLD <- 2.0 # Pixels with CI >= 2 count as germinated +# FOR TESTING: Set these fields as "recently planted" to demonstrate germination progress +YOUNG_FIELDS_FOR_TESTING <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") # First 10 field IDs +YOUNG_FIELD_PLANTING_DATE <- as.Date("2026-01-01") # Recently planted for demo + # FOUR-WEEK TREND THRESHOLDS FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 @@ -543,6 +547,10 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year, message(paste("Calculating statistics for all fields - Week", week_num, year)) + # Debug: Check if constants are available + message(paste(" DEBUG: YOUNG_FIELDS_FOR_TESTING =", paste(YOUNG_FIELDS_FOR_TESTING, collapse=", "))) + message(paste(" DEBUG: YOUNG_FIELD_PLANTING_DATE =", YOUNG_FIELD_PLANTING_DATE)) + # Build tile file list tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) @@ -609,10 +617,19 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year, else "Partial coverage" # Age and Phase - age_weeks <- if (USE_UNIFORM_AGE) { - as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) - } else { - NA_real_ + age_weeks <- NA_real_ + if (USE_UNIFORM_AGE) { + # Check if this field is in the "young fields" list (for testing germination progress) + is_young_field <- field_id %in% YOUNG_FIELDS_FOR_TESTING + if (is_young_field) { + age_weeks <- as.numeric(difftime(report_date, YOUNG_FIELD_PLANTING_DATE, units = "weeks")) + # Debug for first 2 matches + if (field_id %in% c("1", "2")) { + message(paste(" DEBUG: Field", field_id, "is young field, age =", round(age_weeks, 2), "weeks")) + } + } else { + age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) + } } phase <- get_phase_by_age(age_weeks) @@ -686,49 +703,138 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL) { return(current_stats) } + message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) + message(paste(" prev_stats columns:", paste(names(prev_stats), collapse = ", "))) + # Build lookup indices for previous week (by Field_id) prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) + # Try to load previous week's field_analysis to get nmr_weeks_in_this_phase history + prev_field_analysis <- NULL + prev_analysis_csv <- file.path( + reports_dir, "kpis", "field_analysis", + sprintf("%s_field_analysis_week%02d.csv", + paste(strsplit(current_stats$Field_id[1], "")[[1]][1], collapse=""), # Get project from field + as.numeric(format(Sys.Date() - 7, "%V"))) # Approximate previous week + ) + + # Better way: construct the previous week number properly + current_week_num <- as.numeric(format(Sys.Date(), "%V")) + prev_week_num <- current_week_num - 1 + if (prev_week_num < 1) prev_week_num <- 52 + + # This is a bit tricky - we need the project_dir from the main scope + # For now, assume we can infer it or pass it through + # Let's use a simpler approach: look for any field_analysis_week* file that's recent + + tryCatch({ + analysis_dir <- file.path(reports_dir, "kpis", "field_analysis") + if (dir.exists(analysis_dir)) { + # Find the most recent field_analysis CSV (should be previous week) + analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) + if (length(analysis_files) > 0) { + # Sort by modification time and get the most recent + recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] + prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, + col_select = c(Field_id, nmr_weeks_in_this_phase, Phase)) + } + } + }, error = function(e) { + message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) + }) + + if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { + message(paste(" Using previous field_analysis to track nmr_weeks_in_this_phase")) + } + # For each field in current week, lookup previous values + cv_trends_calculated <- 0 for (i in seq_len(nrow(current_stats))) { field_id <- current_stats$Field_id[i] prev_idx <- prev_lookup[field_id] if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { - # Field exists in previous week - prev_row <- prev_stats[prev_idx, ] + # Field exists in previous week - extract row carefully + prev_row <- prev_stats[prev_idx, , drop = FALSE] # Keep as dataframe + + if (nrow(prev_row) == 0) { + # Field not found - skip + next + } + + # Access values from single-row dataframe + prev_mean_ci <- prev_row$Mean_CI[1] + prev_cv <- prev_row$CV[1] + prev_phase <- prev_row$Phase[1] # Weekly CI change (current Mean_CI - previous Mean_CI) - if (!is.na(prev_row$Mean_CI) && !is.na(current_stats$Mean_CI[i])) { + if (!is.na(prev_mean_ci) && !is.na(current_stats$Mean_CI[i])) { current_stats$Weekly_ci_change[i] <- - round(current_stats$Mean_CI[i] - prev_row$Mean_CI, 2) + round(current_stats$Mean_CI[i] - prev_mean_ci, 2) } # CV short-term trend (current CV - previous CV) - if (!is.na(prev_row$CV) && !is.na(current_stats$CV[i])) { - current_stats$CV_Trend_Short_Term[i] <- - round(current_stats$CV[i] - prev_row$CV, 4) + # DEBUG: Check first few fields + if (i <= 3) { + message(paste(" Field", field_id, "- CV_prev:", prev_cv, "CV_curr:", current_stats$CV[i])) + } + + if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { + trend_val <- round(current_stats$CV[i] - prev_cv, 4) + current_stats$CV_Trend_Short_Term[i] <- trend_val + cv_trends_calculated <- cv_trends_calculated + 1 + + if (i <= 3) { + message(paste(" -> CV_Trend =", trend_val)) + } } # Weeks in current phase (track phase transitions) - if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase)) { - if (current_stats$Phase[i] == prev_row$Phase) { - # Same phase - increment counter - prev_weeks <- if (!is.na(prev_row$nmr_weeks_in_this_phase)) { - prev_row$nmr_weeks_in_this_phase - } else { - 1 + # Use previous field_analysis if available for proper counter progression + if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { + # Look up this field in previous analysis + prev_analysis_row <- prev_field_analysis %>% + dplyr::filter(Field_id == field_id) + + if (nrow(prev_analysis_row) > 0) { + prev_phase_analysis <- prev_analysis_row$Phase[1] + prev_nmr_weeks_analysis <- prev_analysis_row$nmr_weeks_in_this_phase[1] + + if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase_analysis)) { + if (current_stats$Phase[i] == prev_phase_analysis) { + # Same phase - increment the counter + current_stats$nmr_weeks_in_this_phase[i] <- + if (!is.na(prev_nmr_weeks_analysis)) prev_nmr_weeks_analysis + 1L else 2L + } else { + # Phase changed - reset to 1 + current_stats$nmr_weeks_in_this_phase[i] <- 1L + } + } + } else if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase)) { + # Field not in previous analysis, fall back to prev_stats phase comparison + if (current_stats$Phase[i] == prev_phase) { + current_stats$nmr_weeks_in_this_phase[i] <- 2L + } else { + current_stats$nmr_weeks_in_this_phase[i] <- 1L + } + } + } else { + # No previous field_analysis available - use phase from prev_stats + if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase)) { + if (current_stats$Phase[i] == prev_phase) { + # Same phase - increment counter (start with 2) + current_stats$nmr_weeks_in_this_phase[i] <- 2L + } else { + # Phase changed - reset to 1 + current_stats$nmr_weeks_in_this_phase[i] <- 1L } - current_stats$nmr_weeks_in_this_phase[i] <- prev_weeks + 1L - } else { - # Phase changed - reset to 1 - current_stats$nmr_weeks_in_this_phase[i] <- 1L } } } } - message(" Calculated trends for all fields") + message(paste(" ✓ Calculated", cv_trends_calculated, "CV_Trend_Short_Term values out of", nrow(current_stats), "fields")) + message(paste(" CV_Trend_Short_Term non-NA values:", sum(!is.na(current_stats$CV_Trend_Short_Term)))) return(current_stats) } @@ -1576,6 +1682,10 @@ main <- function() { # Load/calculate PREVIOUS week stats (from RDS cache or tiles) message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...") + + # Calculate report date for previous week (7 days before current) + prev_report_date <- end_date - 7 + prev_stats <- load_or_calculate_weekly_stats( week_num = previous_week, year = year, @@ -1583,10 +1693,12 @@ main <- function() { field_boundaries_sf = field_boundaries_sf, mosaic_dir = tile_grid$mosaic_dir, reports_dir = reports_dir, - report_date = end_date - 7 # Approximately 1 week before + report_date = prev_report_date ) message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week")) + message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", "))) + message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV)))) # Apply trend calculations (requires both weeks) message("\n3. Calculating trend columns...") @@ -1600,6 +1712,40 @@ main <- function() { message("\nBuilding final field analysis output...") + # Pre-calculate acreages with geometry validation + # This avoids geometry errors during field_analysis construction + acreage_lookup <- tryCatch({ + lookup_df <- field_boundaries_sf %>% + sf::st_drop_geometry() %>% + as.data.frame() %>% + mutate( + geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { + tryCatch({ + sf::st_is_valid(field_boundaries_sf[idx, ]) + }, error = function(e) FALSE) + }), + area_ha = 0 + ) + + # Calculate area for valid geometries + for (idx in which(lookup_df$geometry_valid)) { + tryCatch({ + area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) + lookup_df$area_ha[idx] <- area_m2 / 10000 + }, error = function(e) { + lookup_df$area_ha[idx] <<- NA_real_ + }) + } + + # Convert hectares to acres + lookup_df %>% + mutate(acreage = area_ha / 0.404686) %>% + select(field, acreage) + }, error = function(e) { + message(paste("Warning: Could not calculate acreages from geometries -", e$message)) + data.frame(field = character(0), acreage = numeric(0)) + }) + field_analysis_df <- current_stats %>% mutate( # Column 2: Farm_Section (user fills) @@ -1608,12 +1754,8 @@ main <- function() { Field_name = Field_id, # Column 4: Acreage (calculate from geometry) Acreage = { - acreages <- sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { - field_sf <- field_boundaries_sf[idx, ] - field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000 - field_area_ha / 0.404686 - }) - acreages[match(Field_id, field_boundaries_sf$field)] + acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)] + if_else(is.na(acreages_vec), 0, acreages_vec) }, # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change) # Column 7: Four_week_trend (Phase 3 future)