integration lstm 31 and 80 complete - angata kpi file now dynamic

This commit is contained in:
Timon 2026-01-18 10:14:08 +01:00
parent 1f5add7485
commit 9f312131d7
3 changed files with 166 additions and 70 deletions

View file

@ -16,16 +16,15 @@
# CRITICAL INTEGRATIONS: # CRITICAL INTEGRATIONS:
# #
# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307) # 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307)
# [ ] Load script 31 output: {project}_imminent_harvest_week{WW}.csv # [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv
# Columns: field, imminent_prob, detected_prob, week, year # Columns: field, imminent_prob, detected_prob, week, year
# [ ] LEFT JOIN to field_analysis_df by (field, week, year) # [✓] LEFT JOIN to field_analysis_df by field
# [ ] Replace hardcoded "placeholder data" in Status_trigger calculation # [✓] Use actual harvest probability data instead of placeholder
# [ ] Update column to show actual harvest probability (0-1 or 0-100%)
# #
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 31) # 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23)
# [ ] Scripts 22 & 31 populate harvest.xlsx with planting_date per field # [✓] Load harvest.xlsx with planting_date (season_start)
# [ ] Load harvest.xlsx instead of using UNIFORM_PLANTING_DATE # [✓] Extract planting dates per field
# [ ] Calculate Age_week = difftime(report_date, planting_date, units="weeks") # [] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
# #
# COMMAND-LINE USAGE: # COMMAND-LINE USAGE:
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata # Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
@ -43,8 +42,9 @@
# ============================================================================ # ============================================================================
# NEXT INTEGRATIONS (See Linear issues for detailed requirements) # NEXT INTEGRATIONS (See Linear issues for detailed requirements)
# ============================================================================ # ============================================================================
# 1. Load imminent_prob from script 31 (harvest_imminent_weekly.csv) # 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv)
# 2. Load planting_date from harvest.xlsx for field-specific age calculation # 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation
# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values
# ============================================================================ # ============================================================================
# ============================================================================ # ============================================================================
@ -86,8 +86,7 @@ CI_PERCENTILE_HIGH <- 0.90
GERMINATION_CI_THRESHOLD <- 2.0 GERMINATION_CI_THRESHOLD <- 2.0
# PLANTING DATE & AGE CONFIGURATION # PLANTING DATE & AGE CONFIGURATION
USE_UNIFORM_AGE <- TRUE # Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates
UNIFORM_PLANTING_DATE <- as.Date("2026-01-01")
# HISTORICAL DATA LOOKBACK # HISTORICAL DATA LOOKBACK
WEEKS_FOR_FOUR_WEEK_TREND <- 4 WEEKS_FOR_FOUR_WEEK_TREND <- 4
@ -307,6 +306,26 @@ main <- function() {
auto_generate = allow_auto_gen, auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf) field_boundaries_sf = field_boundaries_sf)
# Load harvest.xlsx for planting dates (season_start)
message("\nLoading harvest data from harvest.xlsx for planting dates...")
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
harvesting_data <- tryCatch({
if (file.exists(harvest_file_path)) {
harvest_raw <- readxl::read_excel(harvest_file_path)
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
harvest_raw
} else {
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
NULL
}
}, error = function(e) {
message(paste(" ERROR loading harvest.xlsx:", e$message))
NULL
})
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
# Validate planting_dates # Validate planting_dates
@ -374,6 +393,28 @@ main <- function() {
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase")) message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase"))
# Load weekly harvest probabilities from script 31 (if available)
message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({
if (file.exists(harvest_prob_file)) {
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE)
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
prob_df %>%
select(field, imminent_prob, detected_prob) %>%
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
} else {
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
NULL
}
}, error = function(e) {
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
NULL
})
# ============================================================================ # ============================================================================
# Build final output dataframe with all 21 columns # Build final output dataframe with all 21 columns
# ============================================================================ # ============================================================================
@ -427,32 +468,91 @@ main <- function() {
}, },
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change) # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
# Column 7: Four_week_trend (from current_stats) # Column 7: Four_week_trend (from current_stats)
# Column 8: Last_harvest_or_planting_date (dummy for now) # Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
Last_harvest_or_planting_date = UNIFORM_PLANTING_DATE, Last_harvest_or_planting_date = {
# Columns 9-10: Already in current_stats (Age_week, Phase) planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
# Column 11: nmr_weeks_in_this_phase (already calculated) },
# Column 12: Germination_progress (already calculated) # Column 9: Age_week (calculated from report date and planting date)
# Column 13: Imminent_prob (placeholder) Age_week = {
Imminent_prob = "placeholder data", sapply(seq_len(nrow(current_stats)), function(idx) {
# Column 14: Status_trigger (need to add) planting_dt <- Last_harvest_or_planting_date[idx]
if (is.na(planting_dt)) {
return(NA_real_)
}
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 1)
})
},
# Column 10: Phase (recalculate based on updated Age_week)
Phase = {
sapply(Age_week, function(age) {
if (is.na(age)) return(NA_character_)
if (age >= 0 & age < 4) return("Germination")
if (age >= 4 & age < 17) return("Tillering")
if (age >= 17 & age < 39) return("Grand Growth")
if (age >= 39) return("Maturation")
NA_character_
})
},
# Column 11: nmr_weeks_in_this_phase (already in current_stats from calculate_kpi_trends)
# Column 12: Germination_progress (calculated here from CI values)
Germination_progress = {
sapply(seq_len(nrow(current_stats)), function(idx) {
age_w <- Age_week[idx]
mean_ci_val <- Mean_CI[idx]
# Only relevant for germination phase (0-4 weeks)
if (is.na(age_w) || age_w < 0 || age_w >= 4) {
return(NA_character_)
}
# Estimate % of field with CI >= germination threshold
# Based on mean CI, estimate germination percentage
if (mean_ci_val >= 0.4) {
return(">80%")
} else if (mean_ci_val >= 0.25) {
return("50-80%")
} else if (mean_ci_val >= 0.1) {
return("20-50%")
} else {
return("<20%")
}
})
},
# Column 13: Imminent_prob (from script 31 or NA if not available)
Imminent_prob = {
if (!is.null(imminent_prob_data)) {
imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
} else {
rep(NA_real_, nrow(current_stats))
}
},
# Column 14: Status_trigger (based on harvest probability + growth status)
Status_trigger = { Status_trigger = {
triggers <- sapply(seq_len(nrow(current_stats)), function(idx) { triggers <- sapply(seq_len(nrow(current_stats)), function(idx) {
field_id <- current_stats$Field_id[idx] imminent_prob <- Imminent_prob[idx]
field_idx <- which(field_boundaries_sf$field == field_id)[1] age_w <- Age_week[idx]
if (is.na(field_idx)) return(NA_character_) ci_change <- Weekly_ci_change[idx]
phase <- Phase[idx]
# Reconstruct CI values from Mean_CI for status trigger logic # Priority 1: Harvest imminent (high probability)
# For now, use simplified approach if (!is.na(imminent_prob) && imminent_prob > 0.5) {
age_w <- current_stats$Age_week[idx] return("harvest_imminent")
ci_change <- current_stats$Weekly_ci_change[idx] }
# Using mean CI as proxy (could be improved with pixel distribution) # Priority 2: Age-based triggers
ci_vals <- rep(current_stats$Mean_CI[idx], 100) if (!is.na(age_w)) {
get_status_trigger(ci_vals, ci_change, age_w) if (age_w >= 45) return("harvest_ready")
if (age_w >= 39) return("maturation_progressing")
if (age_w >= 4 & age_w < 39) return("growth_on_track")
if (age_w < 4) return("germination_started")
}
# Fallback
NA_character_
}) })
triggers triggers
}, },
# Columns 15-16: Already in current_stats (CI_range, CI_Percentiles) # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
# Column 17: Already in current_stats (CV) # Column 17: Already in current_stats (CV)
# Column 18: Already in current_stats (CV_Trend_Short_Term) # Column 18: Already in current_stats (CV_Trend_Short_Term)
# Column 19: CV_Trend_Long_Term (from current_stats - raw slope value) # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
@ -464,11 +564,12 @@ main <- function() {
.keep = "all" # Keep all existing columns .keep = "all" # Keep all existing columns
) %>% ) %>%
select( select(
Field_id, Farm_Section, Field_name, Acreage, Mean_CI, Weekly_ci_change, all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Mean_CI", "Weekly_ci_change",
Four_week_trend, Last_harvest_or_planting_date, Age_week, Phase, "Four_week_trend", "Last_harvest_or_planting_date", "Age_week", "Phase",
nmr_weeks_in_this_phase, Germination_progress, Imminent_prob, Status_trigger, "nmr_weeks_in_this_phase", "Germination_progress", "Imminent_prob", "Status_trigger",
CI_range, CI_Percentiles, CV, CV_Trend_Short_Term, CV_Trend_Long_Term, CV_Trend_Long_Term_Category, "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
Cloud_pct_clear, Cloud_category "Cloud_pct_clear", "Cloud_category")),
any_of(c("CI_range", "CI_Percentiles"))
) )
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns")) message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
@ -480,6 +581,7 @@ main <- function() {
summary_statistics_df, summary_statistics_df,
project_dir, project_dir,
current_week, current_week,
year,
reports_dir reports_dir
) )

View file

@ -106,7 +106,7 @@ generate_field_analysis_summary <- function(field_df) {
# EXPORT FUNCTIONS # EXPORT FUNCTIONS
# ============================================================================ # ============================================================================
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
message("Exporting per-field analysis to Excel, CSV, and RDS...") message("Exporting per-field analysis to Excel, CSV, and RDS...")
field_df_rounded <- field_df %>% field_df_rounded <- field_df %>%
@ -120,7 +120,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
dir.create(output_subdir, recursive = TRUE) dir.create(output_subdir, recursive = TRUE)
} }
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx") excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename) excel_path <- file.path(output_subdir, excel_filename)
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
@ -137,18 +137,19 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
field_analysis_summary = summary_df_rounded, field_analysis_summary = summary_df_rounded,
metadata = list( metadata = list(
current_week = current_week, current_week = current_week,
year = year,
project = project_dir, project = project_dir,
created_at = Sys.time() created_at = Sys.time()
) )
) )
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds") rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
rds_path <- file.path(reports_dir, "kpis", rds_filename) rds_path <- file.path(reports_dir, "kpis", rds_filename)
saveRDS(kpi_data, rds_path) saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path)) message(paste("✓ Field analysis RDS exported to:", rds_path))
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv") csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
csv_path <- file.path(output_subdir, csv_filename) csv_path <- file.path(output_subdir, csv_filename)
write_csv(field_df_rounded, csv_path) write_csv(field_df_rounded, csv_path)
message(paste("✓ Field analysis CSV exported to:", csv_path)) message(paste("✓ Field analysis CSV exported to:", csv_path))

View file

@ -335,21 +335,18 @@ get_status_trigger <- function(ci_values, ci_change, age_weeks) {
} }
extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) {
if (USE_UNIFORM_AGE) { # Extract planting dates from harvest.xlsx (season_start column)
message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) # Returns: data.frame with columns (field_id, planting_date)
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
message("Warning: No harvesting data available - planting dates will be NA.")
if (!is.null(field_boundaries_sf)) { if (!is.null(field_boundaries_sf)) {
return(data.frame( return(data.frame(
field_id = field_boundaries_sf$field, field_id = field_boundaries_sf$field,
date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)), planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
stringsAsFactors = FALSE stringsAsFactors = FALSE
)) ))
} else {
return(NULL)
} }
}
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
message("Warning: No harvesting data available.")
return(NULL) return(NULL)
} }
@ -362,7 +359,7 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL)
filter(!is.na(planting_date)) %>% filter(!is.na(planting_date)) %>%
as.data.frame() as.data.frame()
message(paste("Extracted planting dates for", nrow(planting_dates), "fields")) message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx"))
return(planting_dates) return(planting_dates)
}, error = function(e) { }, error = function(e) {
message(paste("Error extracting planting dates:", e$message)) message(paste("Error extracting planting dates:", e$message))
@ -431,17 +428,8 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
else if (pct_clear >= 99.5) "Clear view" else if (pct_clear >= 99.5) "Clear view"
else "Partial coverage" else "Partial coverage"
age_weeks <- NA_real_ # Age_week and Phase are now calculated in main script using actual planting dates
if (USE_UNIFORM_AGE) { # Germination_progress is calculated in main script after Age_week is known
age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
}
phase <- get_phase_by_age(age_weeks)
germination_progress <- NA_character_
if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks < 17) {
pct_ci_ge_threshold <- sum(ci_vals >= GERMINATION_CI_THRESHOLD) / length(ci_vals) * 100
germination_progress <- sprintf("%.1f%%", pct_ci_ge_threshold)
}
existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id)
@ -457,9 +445,6 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
CI_Percentiles = ci_percentiles_str, CI_Percentiles = ci_percentiles_str,
Cloud_pct_clear = pct_clear, Cloud_pct_clear = pct_clear,
Cloud_category = cloud_cat, Cloud_category = cloud_cat,
Age_week = round(age_weeks, 1),
Phase = phase,
Germination_progress = germination_progress,
stringsAsFactors = FALSE stringsAsFactors = FALSE
) )
@ -536,9 +521,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
for (lookback in 1:4) { for (lookback in 1:4) {
target_week <- current_week - lookback target_week <- current_week - lookback
if (target_week < 1) target_week <- target_week + 52 target_year <- year
if (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week) rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
if (file.exists(rds_path)) { if (file.exists(rds_path)) {
@ -556,9 +545,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
for (lookback in 1:8) { for (lookback in 1:8) {
target_week <- current_week - lookback target_week <- current_week - lookback
if (target_week < 1) target_week <- target_week + 52 target_year <- year
if (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week) rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
if (file.exists(rds_path)) { if (file.exists(rds_path)) {
@ -693,7 +686,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf,
mosaic_dir, reports_dir, report_date = Sys.Date()) { mosaic_dir, reports_dir, report_date = Sys.Date()) {
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, week_num) rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
if (file.exists(rds_path)) { if (file.exists(rds_path)) {
@ -713,7 +706,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
saveRDS(stats_df, rds_path) saveRDS(stats_df, rds_path)
message(paste("Saved weekly statistics RDS:", basename(rds_path))) message(paste("Saved weekly statistics RDS:", basename(rds_path)))
csv_filename <- sprintf("%s_field_stats_week%02d.csv", project_dir, week_num) csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year)
csv_path <- file.path(output_dir, csv_filename) csv_path <- file.path(output_dir, csv_filename)
readr::write_csv(stats_df, csv_path) readr::write_csv(stats_df, csv_path)
message(paste("Saved weekly statistics CSV:", basename(csv_path))) message(paste("Saved weekly statistics CSV:", basename(csv_path)))