diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 9c5b2b4..410ed40 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -16,16 +16,15 @@ # CRITICAL INTEGRATIONS: # # 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307) -# [ ] Load script 31 output: {project}_imminent_harvest_week{WW}.csv +# [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv # Columns: field, imminent_prob, detected_prob, week, year -# [ ] LEFT JOIN to field_analysis_df by (field, week, year) -# [ ] Replace hardcoded "placeholder data" in Status_trigger calculation -# [ ] Update column to show actual harvest probability (0-1 or 0-100%) +# [✓] LEFT JOIN to field_analysis_df by field +# [✓] Use actual harvest probability data instead of placeholder # -# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 31) -# [ ] Scripts 22 & 31 populate harvest.xlsx with planting_date per field -# [ ] Load harvest.xlsx instead of using UNIFORM_PLANTING_DATE -# [ ] Calculate Age_week = difftime(report_date, planting_date, units="weeks") +# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23) +# [✓] Load harvest.xlsx with planting_date (season_start) +# [✓] Extract planting dates per field +# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks") # # COMMAND-LINE USAGE: # Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata @@ -43,8 +42,9 @@ # ============================================================================ # NEXT INTEGRATIONS (See Linear issues for detailed requirements) # ============================================================================ -# 1. Load imminent_prob from script 31 (harvest_imminent_weekly.csv) -# 2. Load planting_date from harvest.xlsx for field-specific age calculation +# 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv) +# 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation +# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values # ============================================================================ # ============================================================================ @@ -86,8 +86,7 @@ CI_PERCENTILE_HIGH <- 0.90 GERMINATION_CI_THRESHOLD <- 2.0 # PLANTING DATE & AGE CONFIGURATION -USE_UNIFORM_AGE <- TRUE -UNIFORM_PLANTING_DATE <- as.Date("2026-01-01") +# Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates # HISTORICAL DATA LOOKBACK WEEKS_FOR_FOUR_WEEK_TREND <- 4 @@ -307,6 +306,26 @@ main <- function() { auto_generate = allow_auto_gen, field_boundaries_sf = field_boundaries_sf) + # Load harvest.xlsx for planting dates (season_start) + message("\nLoading harvest data from harvest.xlsx for planting dates...") + harvest_file_path <- file.path(data_dir, "harvest.xlsx") + + harvesting_data <- tryCatch({ + if (file.exists(harvest_file_path)) { + harvest_raw <- readxl::read_excel(harvest_file_path) + harvest_raw$season_start <- as.Date(harvest_raw$season_start) + harvest_raw$season_end <- as.Date(harvest_raw$season_end) + message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) + harvest_raw + } else { + message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) + NULL + } + }, error = function(e) { + message(paste(" ERROR loading harvest.xlsx:", e$message)) + NULL + }) + planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) # Validate planting_dates @@ -374,6 +393,28 @@ main <- function() { message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase")) + # Load weekly harvest probabilities from script 31 (if available) + message("\n4. Loading harvest probabilities from script 31...") + harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats", + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year)) + message(paste(" Looking for:", harvest_prob_file)) + + imminent_prob_data <- tryCatch({ + if (file.exists(harvest_prob_file)) { + prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE) + message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields")) + prob_df %>% + select(field, imminent_prob, detected_prob) %>% + rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob) + } else { + message(paste(" INFO: Harvest probabilities not available (script 31 not run)")) + NULL + } + }, error = function(e) { + message(paste(" WARNING: Could not load harvest probabilities:", e$message)) + NULL + }) + # ============================================================================ # Build final output dataframe with all 21 columns # ============================================================================ @@ -427,32 +468,91 @@ main <- function() { }, # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change) # Column 7: Four_week_trend (from current_stats) - # Column 8: Last_harvest_or_planting_date (dummy for now) - Last_harvest_or_planting_date = UNIFORM_PLANTING_DATE, - # Columns 9-10: Already in current_stats (Age_week, Phase) - # Column 11: nmr_weeks_in_this_phase (already calculated) - # Column 12: Germination_progress (already calculated) - # Column 13: Imminent_prob (placeholder) - Imminent_prob = "placeholder data", - # Column 14: Status_trigger (need to add) + # Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start) + Last_harvest_or_planting_date = { + planting_dates$planting_date[match(Field_id, planting_dates$field_id)] + }, + # Column 9: Age_week (calculated from report date and planting date) + Age_week = { + sapply(seq_len(nrow(current_stats)), function(idx) { + planting_dt <- Last_harvest_or_planting_date[idx] + if (is.na(planting_dt)) { + return(NA_real_) + } + round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 1) + }) + }, + # Column 10: Phase (recalculate based on updated Age_week) + Phase = { + sapply(Age_week, function(age) { + if (is.na(age)) return(NA_character_) + if (age >= 0 & age < 4) return("Germination") + if (age >= 4 & age < 17) return("Tillering") + if (age >= 17 & age < 39) return("Grand Growth") + if (age >= 39) return("Maturation") + NA_character_ + }) + }, + # Column 11: nmr_weeks_in_this_phase (already in current_stats from calculate_kpi_trends) + # Column 12: Germination_progress (calculated here from CI values) + Germination_progress = { + sapply(seq_len(nrow(current_stats)), function(idx) { + age_w <- Age_week[idx] + mean_ci_val <- Mean_CI[idx] + + # Only relevant for germination phase (0-4 weeks) + if (is.na(age_w) || age_w < 0 || age_w >= 4) { + return(NA_character_) + } + + # Estimate % of field with CI >= germination threshold + # Based on mean CI, estimate germination percentage + if (mean_ci_val >= 0.4) { + return(">80%") + } else if (mean_ci_val >= 0.25) { + return("50-80%") + } else if (mean_ci_val >= 0.1) { + return("20-50%") + } else { + return("<20%") + } + }) + }, + # Column 13: Imminent_prob (from script 31 or NA if not available) + Imminent_prob = { + if (!is.null(imminent_prob_data)) { + imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)] + } else { + rep(NA_real_, nrow(current_stats)) + } + }, + # Column 14: Status_trigger (based on harvest probability + growth status) Status_trigger = { triggers <- sapply(seq_len(nrow(current_stats)), function(idx) { - field_id <- current_stats$Field_id[idx] - field_idx <- which(field_boundaries_sf$field == field_id)[1] - if (is.na(field_idx)) return(NA_character_) + imminent_prob <- Imminent_prob[idx] + age_w <- Age_week[idx] + ci_change <- Weekly_ci_change[idx] + phase <- Phase[idx] - # Reconstruct CI values from Mean_CI for status trigger logic - # For now, use simplified approach - age_w <- current_stats$Age_week[idx] - ci_change <- current_stats$Weekly_ci_change[idx] + # Priority 1: Harvest imminent (high probability) + if (!is.na(imminent_prob) && imminent_prob > 0.5) { + return("harvest_imminent") + } - # Using mean CI as proxy (could be improved with pixel distribution) - ci_vals <- rep(current_stats$Mean_CI[idx], 100) - get_status_trigger(ci_vals, ci_change, age_w) + # Priority 2: Age-based triggers + if (!is.na(age_w)) { + if (age_w >= 45) return("harvest_ready") + if (age_w >= 39) return("maturation_progressing") + if (age_w >= 4 & age_w < 39) return("growth_on_track") + if (age_w < 4) return("germination_started") + } + + # Fallback + NA_character_ }) triggers }, - # Columns 15-16: Already in current_stats (CI_range, CI_Percentiles) + # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles) # Column 17: Already in current_stats (CV) # Column 18: Already in current_stats (CV_Trend_Short_Term) # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value) @@ -464,11 +564,12 @@ main <- function() { .keep = "all" # Keep all existing columns ) %>% select( - Field_id, Farm_Section, Field_name, Acreage, Mean_CI, Weekly_ci_change, - Four_week_trend, Last_harvest_or_planting_date, Age_week, Phase, - nmr_weeks_in_this_phase, Germination_progress, Imminent_prob, Status_trigger, - CI_range, CI_Percentiles, CV, CV_Trend_Short_Term, CV_Trend_Long_Term, CV_Trend_Long_Term_Category, - Cloud_pct_clear, Cloud_category + all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Mean_CI", "Weekly_ci_change", + "Four_week_trend", "Last_harvest_or_planting_date", "Age_week", "Phase", + "nmr_weeks_in_this_phase", "Germination_progress", "Imminent_prob", "Status_trigger", + "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", + "Cloud_pct_clear", "Cloud_category")), + any_of(c("CI_range", "CI_Percentiles")) ) message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns")) @@ -480,6 +581,7 @@ main <- function() { summary_statistics_df, project_dir, current_week, + year, reports_dir ) diff --git a/r_app/80_report_building_utils.R b/r_app/80_report_building_utils.R index 35f20a7..7b7f4e9 100644 --- a/r_app/80_report_building_utils.R +++ b/r_app/80_report_building_utils.R @@ -106,7 +106,7 @@ generate_field_analysis_summary <- function(field_df) { # EXPORT FUNCTIONS # ============================================================================ -export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { +export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) { message("Exporting per-field analysis to Excel, CSV, and RDS...") field_df_rounded <- field_df %>% @@ -120,7 +120,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre dir.create(output_subdir, recursive = TRUE) } - excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx") + excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx") excel_path <- file.path(output_subdir, excel_filename) excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) @@ -137,18 +137,19 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre field_analysis_summary = summary_df_rounded, metadata = list( current_week = current_week, + year = year, project = project_dir, created_at = Sys.time() ) ) - rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds") + rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") rds_path <- file.path(reports_dir, "kpis", rds_filename) saveRDS(kpi_data, rds_path) message(paste("✓ Field analysis RDS exported to:", rds_path)) - csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv") + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv") csv_path <- file.path(output_subdir, csv_filename) write_csv(field_df_rounded, csv_path) message(paste("✓ Field analysis CSV exported to:", csv_path)) diff --git a/r_app/80_weekly_stats_utils.R b/r_app/80_weekly_stats_utils.R index 3a5f1d2..fb5dc8b 100644 --- a/r_app/80_weekly_stats_utils.R +++ b/r_app/80_weekly_stats_utils.R @@ -335,21 +335,18 @@ get_status_trigger <- function(ci_values, ci_change, age_weeks) { } extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { - if (USE_UNIFORM_AGE) { - message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) + # Extract planting dates from harvest.xlsx (season_start column) + # Returns: data.frame with columns (field_id, planting_date) + + if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { + message("Warning: No harvesting data available - planting dates will be NA.") if (!is.null(field_boundaries_sf)) { return(data.frame( field_id = field_boundaries_sf$field, - date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)), + planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), stringsAsFactors = FALSE )) - } else { - return(NULL) } - } - - if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { - message("Warning: No harvesting data available.") return(NULL) } @@ -362,7 +359,7 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) filter(!is.na(planting_date)) %>% as.data.frame() - message(paste("Extracted planting dates for", nrow(planting_dates), "fields")) + message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx")) return(planting_dates) }, error = function(e) { message(paste("Error extracting planting dates:", e$message)) @@ -431,17 +428,8 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year, else if (pct_clear >= 99.5) "Clear view" else "Partial coverage" - age_weeks <- NA_real_ - if (USE_UNIFORM_AGE) { - age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) - } - phase <- get_phase_by_age(age_weeks) - - germination_progress <- NA_character_ - if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks < 17) { - pct_ci_ge_threshold <- sum(ci_vals >= GERMINATION_CI_THRESHOLD) / length(ci_vals) * 100 - germination_progress <- sprintf("%.1f%%", pct_ci_ge_threshold) - } + # Age_week and Phase are now calculated in main script using actual planting dates + # Germination_progress is calculated in main script after Age_week is known existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) @@ -457,9 +445,6 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year, CI_Percentiles = ci_percentiles_str, Cloud_pct_clear = pct_clear, Cloud_category = cloud_cat, - Age_week = round(age_weeks, 1), - Phase = phase, - Germination_progress = germination_progress, stringsAsFactors = FALSE ) @@ -536,9 +521,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, for (lookback in 1:4) { target_week <- current_week - lookback - if (target_week < 1) target_week <- target_week + 52 + target_year <- year + if (target_week < 1) { + target_week <- target_week + 52 + target_year <- target_year - 1 + } - rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week) + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) if (file.exists(rds_path)) { @@ -556,9 +545,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, for (lookback in 1:8) { target_week <- current_week - lookback - if (target_week < 1) target_week <- target_week + 52 + target_year <- year + if (target_week < 1) { + target_week <- target_week + 52 + target_year <- target_year - 1 + } - rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week) + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) if (file.exists(rds_path)) { @@ -693,7 +686,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, mosaic_dir, reports_dir, report_date = Sys.Date()) { - rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, week_num) + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) if (file.exists(rds_path)) { @@ -713,7 +706,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo saveRDS(stats_df, rds_path) message(paste("Saved weekly statistics RDS:", basename(rds_path))) - csv_filename <- sprintf("%s_field_stats_week%02d.csv", project_dir, week_num) + csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year) csv_path <- file.path(output_dir, csv_filename) readr::write_csv(stats_df, csv_path) message(paste("Saved weekly statistics CSV:", basename(csv_path)))