integration lstm 31 and 80 complete - angata kpi file now dynamic

This commit is contained in:
Timon 2026-01-18 10:14:08 +01:00
parent 1f5add7485
commit 9f312131d7
3 changed files with 166 additions and 70 deletions

View file

@ -16,16 +16,15 @@
# CRITICAL INTEGRATIONS:
#
# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307)
# [ ] Load script 31 output: {project}_imminent_harvest_week{WW}.csv
# [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv
# Columns: field, imminent_prob, detected_prob, week, year
# [ ] LEFT JOIN to field_analysis_df by (field, week, year)
# [ ] Replace hardcoded "placeholder data" in Status_trigger calculation
# [ ] Update column to show actual harvest probability (0-1 or 0-100%)
# [✓] LEFT JOIN to field_analysis_df by field
# [✓] Use actual harvest probability data instead of placeholder
#
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 31)
# [ ] Scripts 22 & 31 populate harvest.xlsx with planting_date per field
# [ ] Load harvest.xlsx instead of using UNIFORM_PLANTING_DATE
# [ ] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23)
# [✓] Load harvest.xlsx with planting_date (season_start)
# [✓] Extract planting dates per field
# [] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
#
# COMMAND-LINE USAGE:
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
@ -43,8 +42,9 @@
# ============================================================================
# NEXT INTEGRATIONS (See Linear issues for detailed requirements)
# ============================================================================
# 1. Load imminent_prob from script 31 (harvest_imminent_weekly.csv)
# 2. Load planting_date from harvest.xlsx for field-specific age calculation
# 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv)
# 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation
# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values
# ============================================================================
# ============================================================================
@ -86,8 +86,7 @@ CI_PERCENTILE_HIGH <- 0.90
GERMINATION_CI_THRESHOLD <- 2.0
# PLANTING DATE & AGE CONFIGURATION
USE_UNIFORM_AGE <- TRUE
UNIFORM_PLANTING_DATE <- as.Date("2026-01-01")
# Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates
# HISTORICAL DATA LOOKBACK
WEEKS_FOR_FOUR_WEEK_TREND <- 4
@ -307,6 +306,26 @@ main <- function() {
auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf)
# Load harvest.xlsx for planting dates (season_start)
message("\nLoading harvest data from harvest.xlsx for planting dates...")
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
harvesting_data <- tryCatch({
if (file.exists(harvest_file_path)) {
harvest_raw <- readxl::read_excel(harvest_file_path)
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
harvest_raw
} else {
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
NULL
}
}, error = function(e) {
message(paste(" ERROR loading harvest.xlsx:", e$message))
NULL
})
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
# Validate planting_dates
@ -374,6 +393,28 @@ main <- function() {
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase"))
# Load weekly harvest probabilities from script 31 (if available)
message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({
if (file.exists(harvest_prob_file)) {
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE)
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
prob_df %>%
select(field, imminent_prob, detected_prob) %>%
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
} else {
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
NULL
}
}, error = function(e) {
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
NULL
})
# ============================================================================
# Build final output dataframe with all 21 columns
# ============================================================================
@ -427,32 +468,91 @@ main <- function() {
},
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
# Column 7: Four_week_trend (from current_stats)
# Column 8: Last_harvest_or_planting_date (dummy for now)
Last_harvest_or_planting_date = UNIFORM_PLANTING_DATE,
# Columns 9-10: Already in current_stats (Age_week, Phase)
# Column 11: nmr_weeks_in_this_phase (already calculated)
# Column 12: Germination_progress (already calculated)
# Column 13: Imminent_prob (placeholder)
Imminent_prob = "placeholder data",
# Column 14: Status_trigger (need to add)
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
Last_harvest_or_planting_date = {
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
},
# Column 9: Age_week (calculated from report date and planting date)
Age_week = {
sapply(seq_len(nrow(current_stats)), function(idx) {
planting_dt <- Last_harvest_or_planting_date[idx]
if (is.na(planting_dt)) {
return(NA_real_)
}
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 1)
})
},
# Column 10: Phase (recalculate based on updated Age_week)
Phase = {
sapply(Age_week, function(age) {
if (is.na(age)) return(NA_character_)
if (age >= 0 & age < 4) return("Germination")
if (age >= 4 & age < 17) return("Tillering")
if (age >= 17 & age < 39) return("Grand Growth")
if (age >= 39) return("Maturation")
NA_character_
})
},
# Column 11: nmr_weeks_in_this_phase (already in current_stats from calculate_kpi_trends)
# Column 12: Germination_progress (calculated here from CI values)
Germination_progress = {
sapply(seq_len(nrow(current_stats)), function(idx) {
age_w <- Age_week[idx]
mean_ci_val <- Mean_CI[idx]
# Only relevant for germination phase (0-4 weeks)
if (is.na(age_w) || age_w < 0 || age_w >= 4) {
return(NA_character_)
}
# Estimate % of field with CI >= germination threshold
# Based on mean CI, estimate germination percentage
if (mean_ci_val >= 0.4) {
return(">80%")
} else if (mean_ci_val >= 0.25) {
return("50-80%")
} else if (mean_ci_val >= 0.1) {
return("20-50%")
} else {
return("<20%")
}
})
},
# Column 13: Imminent_prob (from script 31 or NA if not available)
Imminent_prob = {
if (!is.null(imminent_prob_data)) {
imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
} else {
rep(NA_real_, nrow(current_stats))
}
},
# Column 14: Status_trigger (based on harvest probability + growth status)
Status_trigger = {
triggers <- sapply(seq_len(nrow(current_stats)), function(idx) {
field_id <- current_stats$Field_id[idx]
field_idx <- which(field_boundaries_sf$field == field_id)[1]
if (is.na(field_idx)) return(NA_character_)
imminent_prob <- Imminent_prob[idx]
age_w <- Age_week[idx]
ci_change <- Weekly_ci_change[idx]
phase <- Phase[idx]
# Reconstruct CI values from Mean_CI for status trigger logic
# For now, use simplified approach
age_w <- current_stats$Age_week[idx]
ci_change <- current_stats$Weekly_ci_change[idx]
# Priority 1: Harvest imminent (high probability)
if (!is.na(imminent_prob) && imminent_prob > 0.5) {
return("harvest_imminent")
}
# Using mean CI as proxy (could be improved with pixel distribution)
ci_vals <- rep(current_stats$Mean_CI[idx], 100)
get_status_trigger(ci_vals, ci_change, age_w)
# Priority 2: Age-based triggers
if (!is.na(age_w)) {
if (age_w >= 45) return("harvest_ready")
if (age_w >= 39) return("maturation_progressing")
if (age_w >= 4 & age_w < 39) return("growth_on_track")
if (age_w < 4) return("germination_started")
}
# Fallback
NA_character_
})
triggers
},
# Columns 15-16: Already in current_stats (CI_range, CI_Percentiles)
# Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
# Column 17: Already in current_stats (CV)
# Column 18: Already in current_stats (CV_Trend_Short_Term)
# Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
@ -464,11 +564,12 @@ main <- function() {
.keep = "all" # Keep all existing columns
) %>%
select(
Field_id, Farm_Section, Field_name, Acreage, Mean_CI, Weekly_ci_change,
Four_week_trend, Last_harvest_or_planting_date, Age_week, Phase,
nmr_weeks_in_this_phase, Germination_progress, Imminent_prob, Status_trigger,
CI_range, CI_Percentiles, CV, CV_Trend_Short_Term, CV_Trend_Long_Term, CV_Trend_Long_Term_Category,
Cloud_pct_clear, Cloud_category
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Mean_CI", "Weekly_ci_change",
"Four_week_trend", "Last_harvest_or_planting_date", "Age_week", "Phase",
"nmr_weeks_in_this_phase", "Germination_progress", "Imminent_prob", "Status_trigger",
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
"Cloud_pct_clear", "Cloud_category")),
any_of(c("CI_range", "CI_Percentiles"))
)
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
@ -480,6 +581,7 @@ main <- function() {
summary_statistics_df,
project_dir,
current_week,
year,
reports_dir
)

View file

@ -106,7 +106,7 @@ generate_field_analysis_summary <- function(field_df) {
# EXPORT FUNCTIONS
# ============================================================================
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
message("Exporting per-field analysis to Excel, CSV, and RDS...")
field_df_rounded <- field_df %>%
@ -120,7 +120,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
dir.create(output_subdir, recursive = TRUE)
}
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx")
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename)
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
@ -137,18 +137,19 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
field_analysis_summary = summary_df_rounded,
metadata = list(
current_week = current_week,
year = year,
project = project_dir,
created_at = Sys.time()
)
)
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds")
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
rds_path <- file.path(reports_dir, "kpis", rds_filename)
saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path))
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
csv_path <- file.path(output_subdir, csv_filename)
write_csv(field_df_rounded, csv_path)
message(paste("✓ Field analysis CSV exported to:", csv_path))

View file

@ -335,21 +335,18 @@ get_status_trigger <- function(ci_values, ci_change, age_weeks) {
}
extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) {
if (USE_UNIFORM_AGE) {
message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
# Extract planting dates from harvest.xlsx (season_start column)
# Returns: data.frame with columns (field_id, planting_date)
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
message("Warning: No harvesting data available - planting dates will be NA.")
if (!is.null(field_boundaries_sf)) {
return(data.frame(
field_id = field_boundaries_sf$field,
date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)),
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
stringsAsFactors = FALSE
))
} else {
return(NULL)
}
}
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
message("Warning: No harvesting data available.")
return(NULL)
}
@ -362,7 +359,7 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL)
filter(!is.na(planting_date)) %>%
as.data.frame()
message(paste("Extracted planting dates for", nrow(planting_dates), "fields"))
message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx"))
return(planting_dates)
}, error = function(e) {
message(paste("Error extracting planting dates:", e$message))
@ -431,17 +428,8 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
else if (pct_clear >= 99.5) "Clear view"
else "Partial coverage"
age_weeks <- NA_real_
if (USE_UNIFORM_AGE) {
age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
}
phase <- get_phase_by_age(age_weeks)
germination_progress <- NA_character_
if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks < 17) {
pct_ci_ge_threshold <- sum(ci_vals >= GERMINATION_CI_THRESHOLD) / length(ci_vals) * 100
germination_progress <- sprintf("%.1f%%", pct_ci_ge_threshold)
}
# Age_week and Phase are now calculated in main script using actual planting dates
# Germination_progress is calculated in main script after Age_week is known
existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id)
@ -457,9 +445,6 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
CI_Percentiles = ci_percentiles_str,
Cloud_pct_clear = pct_clear,
Cloud_category = cloud_cat,
Age_week = round(age_weeks, 1),
Phase = phase,
Germination_progress = germination_progress,
stringsAsFactors = FALSE
)
@ -536,9 +521,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
for (lookback in 1:4) {
target_week <- current_week - lookback
if (target_week < 1) target_week <- target_week + 52
target_year <- year
if (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week)
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
if (file.exists(rds_path)) {
@ -556,9 +545,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
for (lookback in 1:8) {
target_week <- current_week - lookback
if (target_week < 1) target_week <- target_week + 52
target_year <- year
if (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week)
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
if (file.exists(rds_path)) {
@ -693,7 +686,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf,
mosaic_dir, reports_dir, report_date = Sys.Date()) {
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, week_num)
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
if (file.exists(rds_path)) {
@ -713,7 +706,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
saveRDS(stats_df, rds_path)
message(paste("Saved weekly statistics RDS:", basename(rds_path)))
csv_filename <- sprintf("%s_field_stats_week%02d.csv", project_dir, week_num)
csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year)
csv_path <- file.path(output_dir, csv_filename)
readr::write_csv(stats_df, csv_path)
message(paste("Saved weekly statistics CSV:", basename(csv_path)))