integration lstm 31 and 80 complete - angata kpi file now dynamic
This commit is contained in:
parent
1f5add7485
commit
9f312131d7
|
|
@ -16,16 +16,15 @@
|
|||
# CRITICAL INTEGRATIONS:
|
||||
#
|
||||
# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307)
|
||||
# [ ] Load script 31 output: {project}_imminent_harvest_week{WW}.csv
|
||||
# [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv
|
||||
# Columns: field, imminent_prob, detected_prob, week, year
|
||||
# [ ] LEFT JOIN to field_analysis_df by (field, week, year)
|
||||
# [ ] Replace hardcoded "placeholder data" in Status_trigger calculation
|
||||
# [ ] Update column to show actual harvest probability (0-1 or 0-100%)
|
||||
# [✓] LEFT JOIN to field_analysis_df by field
|
||||
# [✓] Use actual harvest probability data instead of placeholder
|
||||
#
|
||||
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 31)
|
||||
# [ ] Scripts 22 & 31 populate harvest.xlsx with planting_date per field
|
||||
# [ ] Load harvest.xlsx instead of using UNIFORM_PLANTING_DATE
|
||||
# [ ] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
|
||||
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23)
|
||||
# [✓] Load harvest.xlsx with planting_date (season_start)
|
||||
# [✓] Extract planting dates per field
|
||||
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
|
||||
#
|
||||
# COMMAND-LINE USAGE:
|
||||
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
|
||||
|
|
@ -43,8 +42,9 @@
|
|||
# ============================================================================
|
||||
# NEXT INTEGRATIONS (See Linear issues for detailed requirements)
|
||||
# ============================================================================
|
||||
# 1. Load imminent_prob from script 31 (harvest_imminent_weekly.csv)
|
||||
# 2. Load planting_date from harvest.xlsx for field-specific age calculation
|
||||
# 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv)
|
||||
# 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation
|
||||
# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values
|
||||
# ============================================================================
|
||||
|
||||
# ============================================================================
|
||||
|
|
@ -86,8 +86,7 @@ CI_PERCENTILE_HIGH <- 0.90
|
|||
GERMINATION_CI_THRESHOLD <- 2.0
|
||||
|
||||
# PLANTING DATE & AGE CONFIGURATION
|
||||
USE_UNIFORM_AGE <- TRUE
|
||||
UNIFORM_PLANTING_DATE <- as.Date("2026-01-01")
|
||||
# Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates
|
||||
|
||||
# HISTORICAL DATA LOOKBACK
|
||||
WEEKS_FOR_FOUR_WEEK_TREND <- 4
|
||||
|
|
@ -307,6 +306,26 @@ main <- function() {
|
|||
auto_generate = allow_auto_gen,
|
||||
field_boundaries_sf = field_boundaries_sf)
|
||||
|
||||
# Load harvest.xlsx for planting dates (season_start)
|
||||
message("\nLoading harvest data from harvest.xlsx for planting dates...")
|
||||
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
|
||||
|
||||
harvesting_data <- tryCatch({
|
||||
if (file.exists(harvest_file_path)) {
|
||||
harvest_raw <- readxl::read_excel(harvest_file_path)
|
||||
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
|
||||
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
|
||||
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
|
||||
harvest_raw
|
||||
} else {
|
||||
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
|
||||
NULL
|
||||
}
|
||||
}, error = function(e) {
|
||||
message(paste(" ERROR loading harvest.xlsx:", e$message))
|
||||
NULL
|
||||
})
|
||||
|
||||
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
|
||||
|
||||
# Validate planting_dates
|
||||
|
|
@ -374,6 +393,28 @@ main <- function() {
|
|||
|
||||
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase"))
|
||||
|
||||
# Load weekly harvest probabilities from script 31 (if available)
|
||||
message("\n4. Loading harvest probabilities from script 31...")
|
||||
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
|
||||
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
|
||||
message(paste(" Looking for:", harvest_prob_file))
|
||||
|
||||
imminent_prob_data <- tryCatch({
|
||||
if (file.exists(harvest_prob_file)) {
|
||||
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE)
|
||||
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
|
||||
prob_df %>%
|
||||
select(field, imminent_prob, detected_prob) %>%
|
||||
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
|
||||
} else {
|
||||
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
|
||||
NULL
|
||||
}
|
||||
}, error = function(e) {
|
||||
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
|
||||
NULL
|
||||
})
|
||||
|
||||
# ============================================================================
|
||||
# Build final output dataframe with all 21 columns
|
||||
# ============================================================================
|
||||
|
|
@ -427,32 +468,91 @@ main <- function() {
|
|||
},
|
||||
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
|
||||
# Column 7: Four_week_trend (from current_stats)
|
||||
# Column 8: Last_harvest_or_planting_date (dummy for now)
|
||||
Last_harvest_or_planting_date = UNIFORM_PLANTING_DATE,
|
||||
# Columns 9-10: Already in current_stats (Age_week, Phase)
|
||||
# Column 11: nmr_weeks_in_this_phase (already calculated)
|
||||
# Column 12: Germination_progress (already calculated)
|
||||
# Column 13: Imminent_prob (placeholder)
|
||||
Imminent_prob = "placeholder data",
|
||||
# Column 14: Status_trigger (need to add)
|
||||
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
|
||||
Last_harvest_or_planting_date = {
|
||||
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
|
||||
},
|
||||
# Column 9: Age_week (calculated from report date and planting date)
|
||||
Age_week = {
|
||||
sapply(seq_len(nrow(current_stats)), function(idx) {
|
||||
planting_dt <- Last_harvest_or_planting_date[idx]
|
||||
if (is.na(planting_dt)) {
|
||||
return(NA_real_)
|
||||
}
|
||||
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 1)
|
||||
})
|
||||
},
|
||||
# Column 10: Phase (recalculate based on updated Age_week)
|
||||
Phase = {
|
||||
sapply(Age_week, function(age) {
|
||||
if (is.na(age)) return(NA_character_)
|
||||
if (age >= 0 & age < 4) return("Germination")
|
||||
if (age >= 4 & age < 17) return("Tillering")
|
||||
if (age >= 17 & age < 39) return("Grand Growth")
|
||||
if (age >= 39) return("Maturation")
|
||||
NA_character_
|
||||
})
|
||||
},
|
||||
# Column 11: nmr_weeks_in_this_phase (already in current_stats from calculate_kpi_trends)
|
||||
# Column 12: Germination_progress (calculated here from CI values)
|
||||
Germination_progress = {
|
||||
sapply(seq_len(nrow(current_stats)), function(idx) {
|
||||
age_w <- Age_week[idx]
|
||||
mean_ci_val <- Mean_CI[idx]
|
||||
|
||||
# Only relevant for germination phase (0-4 weeks)
|
||||
if (is.na(age_w) || age_w < 0 || age_w >= 4) {
|
||||
return(NA_character_)
|
||||
}
|
||||
|
||||
# Estimate % of field with CI >= germination threshold
|
||||
# Based on mean CI, estimate germination percentage
|
||||
if (mean_ci_val >= 0.4) {
|
||||
return(">80%")
|
||||
} else if (mean_ci_val >= 0.25) {
|
||||
return("50-80%")
|
||||
} else if (mean_ci_val >= 0.1) {
|
||||
return("20-50%")
|
||||
} else {
|
||||
return("<20%")
|
||||
}
|
||||
})
|
||||
},
|
||||
# Column 13: Imminent_prob (from script 31 or NA if not available)
|
||||
Imminent_prob = {
|
||||
if (!is.null(imminent_prob_data)) {
|
||||
imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
|
||||
} else {
|
||||
rep(NA_real_, nrow(current_stats))
|
||||
}
|
||||
},
|
||||
# Column 14: Status_trigger (based on harvest probability + growth status)
|
||||
Status_trigger = {
|
||||
triggers <- sapply(seq_len(nrow(current_stats)), function(idx) {
|
||||
field_id <- current_stats$Field_id[idx]
|
||||
field_idx <- which(field_boundaries_sf$field == field_id)[1]
|
||||
if (is.na(field_idx)) return(NA_character_)
|
||||
imminent_prob <- Imminent_prob[idx]
|
||||
age_w <- Age_week[idx]
|
||||
ci_change <- Weekly_ci_change[idx]
|
||||
phase <- Phase[idx]
|
||||
|
||||
# Reconstruct CI values from Mean_CI for status trigger logic
|
||||
# For now, use simplified approach
|
||||
age_w <- current_stats$Age_week[idx]
|
||||
ci_change <- current_stats$Weekly_ci_change[idx]
|
||||
# Priority 1: Harvest imminent (high probability)
|
||||
if (!is.na(imminent_prob) && imminent_prob > 0.5) {
|
||||
return("harvest_imminent")
|
||||
}
|
||||
|
||||
# Using mean CI as proxy (could be improved with pixel distribution)
|
||||
ci_vals <- rep(current_stats$Mean_CI[idx], 100)
|
||||
get_status_trigger(ci_vals, ci_change, age_w)
|
||||
# Priority 2: Age-based triggers
|
||||
if (!is.na(age_w)) {
|
||||
if (age_w >= 45) return("harvest_ready")
|
||||
if (age_w >= 39) return("maturation_progressing")
|
||||
if (age_w >= 4 & age_w < 39) return("growth_on_track")
|
||||
if (age_w < 4) return("germination_started")
|
||||
}
|
||||
|
||||
# Fallback
|
||||
NA_character_
|
||||
})
|
||||
triggers
|
||||
},
|
||||
# Columns 15-16: Already in current_stats (CI_range, CI_Percentiles)
|
||||
# Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
|
||||
# Column 17: Already in current_stats (CV)
|
||||
# Column 18: Already in current_stats (CV_Trend_Short_Term)
|
||||
# Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
|
||||
|
|
@ -464,11 +564,12 @@ main <- function() {
|
|||
.keep = "all" # Keep all existing columns
|
||||
) %>%
|
||||
select(
|
||||
Field_id, Farm_Section, Field_name, Acreage, Mean_CI, Weekly_ci_change,
|
||||
Four_week_trend, Last_harvest_or_planting_date, Age_week, Phase,
|
||||
nmr_weeks_in_this_phase, Germination_progress, Imminent_prob, Status_trigger,
|
||||
CI_range, CI_Percentiles, CV, CV_Trend_Short_Term, CV_Trend_Long_Term, CV_Trend_Long_Term_Category,
|
||||
Cloud_pct_clear, Cloud_category
|
||||
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Mean_CI", "Weekly_ci_change",
|
||||
"Four_week_trend", "Last_harvest_or_planting_date", "Age_week", "Phase",
|
||||
"nmr_weeks_in_this_phase", "Germination_progress", "Imminent_prob", "Status_trigger",
|
||||
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
|
||||
"Cloud_pct_clear", "Cloud_category")),
|
||||
any_of(c("CI_range", "CI_Percentiles"))
|
||||
)
|
||||
|
||||
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
|
||||
|
|
@ -480,6 +581,7 @@ main <- function() {
|
|||
summary_statistics_df,
|
||||
project_dir,
|
||||
current_week,
|
||||
year,
|
||||
reports_dir
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ generate_field_analysis_summary <- function(field_df) {
|
|||
# EXPORT FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
|
||||
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
|
||||
message("Exporting per-field analysis to Excel, CSV, and RDS...")
|
||||
|
||||
field_df_rounded <- field_df %>%
|
||||
|
|
@ -120,7 +120,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
|
|||
dir.create(output_subdir, recursive = TRUE)
|
||||
}
|
||||
|
||||
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx")
|
||||
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
|
||||
excel_path <- file.path(output_subdir, excel_filename)
|
||||
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
|
||||
|
||||
|
|
@ -137,18 +137,19 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
|
|||
field_analysis_summary = summary_df_rounded,
|
||||
metadata = list(
|
||||
current_week = current_week,
|
||||
year = year,
|
||||
project = project_dir,
|
||||
created_at = Sys.time()
|
||||
)
|
||||
)
|
||||
|
||||
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds")
|
||||
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
|
||||
rds_path <- file.path(reports_dir, "kpis", rds_filename)
|
||||
|
||||
saveRDS(kpi_data, rds_path)
|
||||
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
||||
|
||||
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
|
||||
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
|
||||
csv_path <- file.path(output_subdir, csv_filename)
|
||||
write_csv(field_df_rounded, csv_path)
|
||||
message(paste("✓ Field analysis CSV exported to:", csv_path))
|
||||
|
|
|
|||
|
|
@ -335,21 +335,18 @@ get_status_trigger <- function(ci_values, ci_change, age_weeks) {
|
|||
}
|
||||
|
||||
extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) {
|
||||
if (USE_UNIFORM_AGE) {
|
||||
message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
|
||||
# Extract planting dates from harvest.xlsx (season_start column)
|
||||
# Returns: data.frame with columns (field_id, planting_date)
|
||||
|
||||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||||
message("Warning: No harvesting data available - planting dates will be NA.")
|
||||
if (!is.null(field_boundaries_sf)) {
|
||||
return(data.frame(
|
||||
field_id = field_boundaries_sf$field,
|
||||
date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)),
|
||||
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
} else {
|
||||
return(NULL)
|
||||
}
|
||||
}
|
||||
|
||||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||||
message("Warning: No harvesting data available.")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
|
|
@ -362,7 +359,7 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL)
|
|||
filter(!is.na(planting_date)) %>%
|
||||
as.data.frame()
|
||||
|
||||
message(paste("Extracted planting dates for", nrow(planting_dates), "fields"))
|
||||
message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx"))
|
||||
return(planting_dates)
|
||||
}, error = function(e) {
|
||||
message(paste("Error extracting planting dates:", e$message))
|
||||
|
|
@ -431,17 +428,8 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
|
|||
else if (pct_clear >= 99.5) "Clear view"
|
||||
else "Partial coverage"
|
||||
|
||||
age_weeks <- NA_real_
|
||||
if (USE_UNIFORM_AGE) {
|
||||
age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
|
||||
}
|
||||
phase <- get_phase_by_age(age_weeks)
|
||||
|
||||
germination_progress <- NA_character_
|
||||
if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks < 17) {
|
||||
pct_ci_ge_threshold <- sum(ci_vals >= GERMINATION_CI_THRESHOLD) / length(ci_vals) * 100
|
||||
germination_progress <- sprintf("%.1f%%", pct_ci_ge_threshold)
|
||||
}
|
||||
# Age_week and Phase are now calculated in main script using actual planting dates
|
||||
# Germination_progress is calculated in main script after Age_week is known
|
||||
|
||||
existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id)
|
||||
|
||||
|
|
@ -457,9 +445,6 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
|
|||
CI_Percentiles = ci_percentiles_str,
|
||||
Cloud_pct_clear = pct_clear,
|
||||
Cloud_category = cloud_cat,
|
||||
Age_week = round(age_weeks, 1),
|
||||
Phase = phase,
|
||||
Germination_progress = germination_progress,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
|
|
@ -536,9 +521,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
|
|||
|
||||
for (lookback in 1:4) {
|
||||
target_week <- current_week - lookback
|
||||
if (target_week < 1) target_week <- target_week + 52
|
||||
target_year <- year
|
||||
if (target_week < 1) {
|
||||
target_week <- target_week + 52
|
||||
target_year <- target_year - 1
|
||||
}
|
||||
|
||||
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week)
|
||||
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
|
||||
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
|
||||
|
||||
if (file.exists(rds_path)) {
|
||||
|
|
@ -556,9 +545,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
|
|||
|
||||
for (lookback in 1:8) {
|
||||
target_week <- current_week - lookback
|
||||
if (target_week < 1) target_week <- target_week + 52
|
||||
target_year <- year
|
||||
if (target_week < 1) {
|
||||
target_week <- target_week + 52
|
||||
target_year <- target_year - 1
|
||||
}
|
||||
|
||||
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week)
|
||||
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
|
||||
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
|
||||
|
||||
if (file.exists(rds_path)) {
|
||||
|
|
@ -693,7 +686,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
|
|||
load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf,
|
||||
mosaic_dir, reports_dir, report_date = Sys.Date()) {
|
||||
|
||||
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, week_num)
|
||||
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
|
||||
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
|
||||
|
||||
if (file.exists(rds_path)) {
|
||||
|
|
@ -713,7 +706,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
|
|||
saveRDS(stats_df, rds_path)
|
||||
message(paste("Saved weekly statistics RDS:", basename(rds_path)))
|
||||
|
||||
csv_filename <- sprintf("%s_field_stats_week%02d.csv", project_dir, week_num)
|
||||
csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year)
|
||||
csv_path <- file.path(output_dir, csv_filename)
|
||||
readr::write_csv(stats_df, csv_path)
|
||||
message(paste("Saved weekly statistics CSV:", basename(csv_path)))
|
||||
|
|
|
|||
Loading…
Reference in a new issue