phase 2 cv trend implemented

This commit is contained in:
Timon 2026-01-16 08:30:47 +01:00
parent 6e88acef25
commit 8d84c8cab5

View file

@ -98,6 +98,10 @@ TEST_MODE_NUM_WEEKS <- 2
# Percentage of pixels that must reach this CI value to count as "germinated"
GERMINATION_CI_THRESHOLD <- 2.0 # Pixels with CI >= 2 count as germinated
# FOR TESTING: Set these fields as "recently planted" to demonstrate germination progress
YOUNG_FIELDS_FOR_TESTING <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") # First 10 field IDs
YOUNG_FIELD_PLANTING_DATE <- as.Date("2026-01-01") # Recently planted for demo
# FOUR-WEEK TREND THRESHOLDS
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
@ -543,6 +547,10 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
message(paste("Calculating statistics for all fields - Week", week_num, year))
# Debug: Check if constants are available
message(paste(" DEBUG: YOUNG_FIELDS_FOR_TESTING =", paste(YOUNG_FIELDS_FOR_TESTING, collapse=", ")))
message(paste(" DEBUG: YOUNG_FIELD_PLANTING_DATE =", YOUNG_FIELD_PLANTING_DATE))
# Build tile file list
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
@ -609,10 +617,19 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
else "Partial coverage"
# Age and Phase
age_weeks <- if (USE_UNIFORM_AGE) {
as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
} else {
NA_real_
age_weeks <- NA_real_
if (USE_UNIFORM_AGE) {
# Check if this field is in the "young fields" list (for testing germination progress)
is_young_field <- field_id %in% YOUNG_FIELDS_FOR_TESTING
if (is_young_field) {
age_weeks <- as.numeric(difftime(report_date, YOUNG_FIELD_PLANTING_DATE, units = "weeks"))
# Debug for first 2 matches
if (field_id %in% c("1", "2")) {
message(paste(" DEBUG: Field", field_id, "is young field, age =", round(age_weeks, 2), "weeks"))
}
} else {
age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
}
}
phase <- get_phase_by_age(age_weeks)
@ -686,49 +703,138 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL) {
return(current_stats)
}
message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns"))
message(paste(" prev_stats columns:", paste(names(prev_stats), collapse = ", ")))
# Build lookup indices for previous week (by Field_id)
prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id)
# Try to load previous week's field_analysis to get nmr_weeks_in_this_phase history
prev_field_analysis <- NULL
prev_analysis_csv <- file.path(
reports_dir, "kpis", "field_analysis",
sprintf("%s_field_analysis_week%02d.csv",
paste(strsplit(current_stats$Field_id[1], "")[[1]][1], collapse=""), # Get project from field
as.numeric(format(Sys.Date() - 7, "%V"))) # Approximate previous week
)
# Better way: construct the previous week number properly
current_week_num <- as.numeric(format(Sys.Date(), "%V"))
prev_week_num <- current_week_num - 1
if (prev_week_num < 1) prev_week_num <- 52
# This is a bit tricky - we need the project_dir from the main scope
# For now, assume we can infer it or pass it through
# Let's use a simpler approach: look for any field_analysis_week* file that's recent
tryCatch({
analysis_dir <- file.path(reports_dir, "kpis", "field_analysis")
if (dir.exists(analysis_dir)) {
# Find the most recent field_analysis CSV (should be previous week)
analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
if (length(analysis_files) > 0) {
# Sort by modification time and get the most recent
recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)]
prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE,
col_select = c(Field_id, nmr_weeks_in_this_phase, Phase))
}
}
}, error = function(e) {
message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message))
})
if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
message(paste(" Using previous field_analysis to track nmr_weeks_in_this_phase"))
}
# For each field in current week, lookup previous values
cv_trends_calculated <- 0
for (i in seq_len(nrow(current_stats))) {
field_id <- current_stats$Field_id[i]
prev_idx <- prev_lookup[field_id]
if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) {
# Field exists in previous week
prev_row <- prev_stats[prev_idx, ]
# Field exists in previous week - extract row carefully
prev_row <- prev_stats[prev_idx, , drop = FALSE] # Keep as dataframe
if (nrow(prev_row) == 0) {
# Field not found - skip
next
}
# Access values from single-row dataframe
prev_mean_ci <- prev_row$Mean_CI[1]
prev_cv <- prev_row$CV[1]
prev_phase <- prev_row$Phase[1]
# Weekly CI change (current Mean_CI - previous Mean_CI)
if (!is.na(prev_row$Mean_CI) && !is.na(current_stats$Mean_CI[i])) {
if (!is.na(prev_mean_ci) && !is.na(current_stats$Mean_CI[i])) {
current_stats$Weekly_ci_change[i] <-
round(current_stats$Mean_CI[i] - prev_row$Mean_CI, 2)
round(current_stats$Mean_CI[i] - prev_mean_ci, 2)
}
# CV short-term trend (current CV - previous CV)
if (!is.na(prev_row$CV) && !is.na(current_stats$CV[i])) {
current_stats$CV_Trend_Short_Term[i] <-
round(current_stats$CV[i] - prev_row$CV, 4)
# DEBUG: Check first few fields
if (i <= 3) {
message(paste(" Field", field_id, "- CV_prev:", prev_cv, "CV_curr:", current_stats$CV[i]))
}
if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) {
trend_val <- round(current_stats$CV[i] - prev_cv, 4)
current_stats$CV_Trend_Short_Term[i] <- trend_val
cv_trends_calculated <- cv_trends_calculated + 1
if (i <= 3) {
message(paste(" -> CV_Trend =", trend_val))
}
}
# Weeks in current phase (track phase transitions)
if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase)) {
if (current_stats$Phase[i] == prev_row$Phase) {
# Same phase - increment counter
prev_weeks <- if (!is.na(prev_row$nmr_weeks_in_this_phase)) {
prev_row$nmr_weeks_in_this_phase
} else {
1
# Use previous field_analysis if available for proper counter progression
if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
# Look up this field in previous analysis
prev_analysis_row <- prev_field_analysis %>%
dplyr::filter(Field_id == field_id)
if (nrow(prev_analysis_row) > 0) {
prev_phase_analysis <- prev_analysis_row$Phase[1]
prev_nmr_weeks_analysis <- prev_analysis_row$nmr_weeks_in_this_phase[1]
if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase_analysis)) {
if (current_stats$Phase[i] == prev_phase_analysis) {
# Same phase - increment the counter
current_stats$nmr_weeks_in_this_phase[i] <-
if (!is.na(prev_nmr_weeks_analysis)) prev_nmr_weeks_analysis + 1L else 2L
} else {
# Phase changed - reset to 1
current_stats$nmr_weeks_in_this_phase[i] <- 1L
}
}
} else if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase)) {
# Field not in previous analysis, fall back to prev_stats phase comparison
if (current_stats$Phase[i] == prev_phase) {
current_stats$nmr_weeks_in_this_phase[i] <- 2L
} else {
current_stats$nmr_weeks_in_this_phase[i] <- 1L
}
}
} else {
# No previous field_analysis available - use phase from prev_stats
if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase)) {
if (current_stats$Phase[i] == prev_phase) {
# Same phase - increment counter (start with 2)
current_stats$nmr_weeks_in_this_phase[i] <- 2L
} else {
# Phase changed - reset to 1
current_stats$nmr_weeks_in_this_phase[i] <- 1L
}
current_stats$nmr_weeks_in_this_phase[i] <- prev_weeks + 1L
} else {
# Phase changed - reset to 1
current_stats$nmr_weeks_in_this_phase[i] <- 1L
}
}
}
}
message(" Calculated trends for all fields")
message(paste(" ✓ Calculated", cv_trends_calculated, "CV_Trend_Short_Term values out of", nrow(current_stats), "fields"))
message(paste(" CV_Trend_Short_Term non-NA values:", sum(!is.na(current_stats$CV_Trend_Short_Term))))
return(current_stats)
}
@ -1576,6 +1682,10 @@ main <- function() {
# Load/calculate PREVIOUS week stats (from RDS cache or tiles)
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
# Calculate report date for previous week (7 days before current)
prev_report_date <- end_date - 7
prev_stats <- load_or_calculate_weekly_stats(
week_num = previous_week,
year = year,
@ -1583,10 +1693,12 @@ main <- function() {
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = tile_grid$mosaic_dir,
reports_dir = reports_dir,
report_date = end_date - 7 # Approximately 1 week before
report_date = prev_report_date
)
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", ")))
message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV))))
# Apply trend calculations (requires both weeks)
message("\n3. Calculating trend columns...")
@ -1600,6 +1712,40 @@ main <- function() {
message("\nBuilding final field analysis output...")
# Pre-calculate acreages with geometry validation
# This avoids geometry errors during field_analysis construction
acreage_lookup <- tryCatch({
lookup_df <- field_boundaries_sf %>%
sf::st_drop_geometry() %>%
as.data.frame() %>%
mutate(
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
tryCatch({
sf::st_is_valid(field_boundaries_sf[idx, ])
}, error = function(e) FALSE)
}),
area_ha = 0
)
# Calculate area for valid geometries
for (idx in which(lookup_df$geometry_valid)) {
tryCatch({
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
lookup_df$area_ha[idx] <- area_m2 / 10000
}, error = function(e) {
lookup_df$area_ha[idx] <<- NA_real_
})
}
# Convert hectares to acres
lookup_df %>%
mutate(acreage = area_ha / 0.404686) %>%
select(field, acreage)
}, error = function(e) {
message(paste("Warning: Could not calculate acreages from geometries -", e$message))
data.frame(field = character(0), acreage = numeric(0))
})
field_analysis_df <- current_stats %>%
mutate(
# Column 2: Farm_Section (user fills)
@ -1608,12 +1754,8 @@ main <- function() {
Field_name = Field_id,
# Column 4: Acreage (calculate from geometry)
Acreage = {
acreages <- sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
field_sf <- field_boundaries_sf[idx, ]
field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
field_area_ha / 0.404686
})
acreages[match(Field_id, field_boundaries_sf$field)]
acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
if_else(is.na(acreages_vec), 0, acreages_vec)
},
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
# Column 7: Four_week_trend (Phase 3 future)