diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 410ed40..bef0764 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -66,17 +66,15 @@ CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05 # Negative slope = CV decreasing = field becoming MORE uniform = GOOD # Positive slope = CV increasing = field becoming MORE patchy = BAD # Near zero = Homogenous growth (all crops progressing equally) -CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.05 # CV decreasing rapidly -CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # Gradual synchronization -CV_SLOPE_IMPROVEMENT_MAX <- -0.005 # Becoming uniform -CV_SLOPE_HOMOGENOUS_MIN <- -0.005 # Stable, uniform growth -CV_SLOPE_HOMOGENOUS_MAX <- 0.005 # No change in uniformity -CV_SLOPE_PATCHINESS_MIN <- 0.005 # Minor divergence -CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness -CV_SLOPE_SEVERE_MIN <- 0.02 # Field fragmentation beginning +CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 # CV decreasing rapidly (>3% drop over 8 weeks) +CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # CV decreasing (2-3% drop over 8 weeks) +CV_SLOPE_IMPROVEMENT_MAX <- -0.01 # Gradual improvement (1-2% drop over 8 weeks) +CV_SLOPE_HOMOGENOUS_MIN <- -0.01 # Essentially stable (small natural variation) +CV_SLOPE_HOMOGENOUS_MAX <- 0.01 # No change in uniformity (within ±1% over 8 weeks) +CV_SLOPE_PATCHINESS_MIN <- 0.01 # Minor divergence (1-2% increase over 8 weeks) +CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness (2-3% increase over 8 weeks) +CV_SLOPE_SEVERE_MIN <- 0.02 # Severe fragmentation (>3% increase over 8 weeks) -# CLOUD COVER ROUNDING INTERVALS -CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100) # PERCENTILE CALCULATIONS CI_PERCENTILE_LOW <- 0.10 @@ -391,7 +389,7 @@ main <- function() { current_week = current_week, year = year) - message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase")) + message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) # Load weekly harvest probabilities from script 31 (if available) message("\n4. Loading harvest probabilities from script 31...") @@ -479,7 +477,7 @@ main <- function() { if (is.na(planting_dt)) { return(NA_real_) } - round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 1) + round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0) }) }, # Column 10: Phase (recalculate based on updated Age_week) @@ -493,31 +491,23 @@ main <- function() { NA_character_ }) }, - # Column 11: nmr_weeks_in_this_phase (already in current_stats from calculate_kpi_trends) + # Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends) # Column 12: Germination_progress (calculated here from CI values) - Germination_progress = { - sapply(seq_len(nrow(current_stats)), function(idx) { - age_w <- Age_week[idx] - mean_ci_val <- Mean_CI[idx] - - # Only relevant for germination phase (0-4 weeks) - if (is.na(age_w) || age_w < 0 || age_w >= 4) { - return(NA_character_) - } - - # Estimate % of field with CI >= germination threshold - # Based on mean CI, estimate germination percentage - if (mean_ci_val >= 0.4) { - return(">80%") - } else if (mean_ci_val >= 0.25) { - return("50-80%") - } else if (mean_ci_val >= 0.1) { - return("20-50%") - } else { - return("<20%") - } - }) - }, + # Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% + Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) { + if (is.na(pct)) return(NA_character_) + if (pct >= 95) return("95-100%") + else if (pct >= 90) return("90-95%") + else if (pct >= 80) return("80-90%") + else if (pct >= 70) return("70-80%") + else if (pct >= 60) return("60-70%") + else if (pct >= 50) return("50-60%") + else if (pct >= 40) return("40-50%") + else if (pct >= 30) return("30-40%") + else if (pct >= 20) return("20-30%") + else if (pct >= 10) return("10-20%") + else return("0-10%") + }), # Column 13: Imminent_prob (from script 31 or NA if not available) Imminent_prob = { if (!is.null(imminent_prob_data)) { @@ -526,59 +516,84 @@ main <- function() { rep(NA_real_, nrow(current_stats)) } }, - # Column 14: Status_trigger (based on harvest probability + growth status) - Status_trigger = { - triggers <- sapply(seq_len(nrow(current_stats)), function(idx) { + # Column 14: Status_Alert (based on harvest probability + crop health status) + # Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA + Status_Alert = { + sapply(seq_len(nrow(current_stats)), function(idx) { imminent_prob <- Imminent_prob[idx] age_w <- Age_week[idx] - ci_change <- Weekly_ci_change[idx] - phase <- Phase[idx] + weekly_ci_chg <- Weekly_ci_change[idx] + mean_ci_val <- Mean_CI[idx] - # Priority 1: Harvest imminent (high probability) - if (!is.na(imminent_prob) && imminent_prob > 0.5) { - return("harvest_imminent") + # Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months) + if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) { + return("Ready for harvest-check") } - # Priority 2: Age-based triggers - if (!is.na(age_w)) { - if (age_w >= 45) return("harvest_ready") - if (age_w >= 39) return("maturation_progressing") - if (age_w >= 4 & age_w < 39) return("growth_on_track") - if (age_w < 4) return("germination_started") + # Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5) + if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) { + return("Strong decline in crop health") } - # Fallback + # Priority 3: Harvested/bare (Mean CI < 1.5) + if (!is.na(mean_ci_val) && mean_ci_val < 1.5) { + return("Harvested/bare") + } + + # Fallback: no alert NA_character_ }) - triggers }, # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles) # Column 17: Already in current_stats (CV) # Column 18: Already in current_stats (CV_Trend_Short_Term) # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value) # Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope) + # 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01) CV_Trend_Long_Term_Category = { - sapply(current_stats$CV_Trend_Long_Term, categorize_cv_slope) + sapply(current_stats$CV_Trend_Long_Term, function(slope) { + if (is.na(slope)) { + return(NA_character_) + } else if (slope < -0.01) { + return("More uniform") + } else if (slope > 0.01) { + return("Less uniform") + } else { + return("Stable uniformity") + } + }) }, # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category) - .keep = "all" # Keep all existing columns + # Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% + Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) { + if (is.na(pct)) return(NA_character_) + if (pct >= 95) return("95-100%") + else if (pct >= 90) return("90-95%") + else if (pct >= 80) return("80-90%") + else if (pct >= 70) return("70-80%") + else if (pct >= 60) return("60-70%") + else if (pct >= 50) return("50-60%") + else if (pct >= 40) return("40-50%") + else if (pct >= 30) return("30-40%") + else if (pct >= 20) return("20-30%") + else if (pct >= 10) return("10-20%") + else return("0-10%") + }), ) %>% select( - all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Mean_CI", "Weekly_ci_change", - "Four_week_trend", "Last_harvest_or_planting_date", "Age_week", "Phase", - "nmr_weeks_in_this_phase", "Germination_progress", "Imminent_prob", "Status_trigger", + all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", + "Last_harvest_or_planting_date", "Age_week", "Phase", + "Germination_progress", + "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", - "Cloud_pct_clear", "Cloud_category")), - any_of(c("CI_range", "CI_Percentiles")) + "Imminent_prob", "Cloud_pct_clear", "Cloud_category")) ) message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns")) - summary_statistics_df <- generate_field_analysis_summary(field_analysis_df) - export_paths <- export_field_analysis_excel( field_analysis_df, - summary_statistics_df, + NULL, project_dir, current_week, year, @@ -586,15 +601,12 @@ main <- function() { ) cat("\n--- Per-field Results (first 10) ---\n") - available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_trigger", "Cloud_category") + available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category") available_cols <- available_cols[available_cols %in% names(field_analysis_df)] if (length(available_cols) > 0) { print(head(field_analysis_df[, available_cols], 10)) } - cat("\n--- Summary Statistics ---\n") - print(summary_statistics_df) - # ========== FARM-LEVEL KPI AGGREGATION ========== # Aggregate the per-field analysis into farm-level summary statistics @@ -623,15 +635,15 @@ main <- function() { farm_summary$phase_distribution <- phase_dist - # 2. STATUS TRIGGER DISTRIBUTION + # 2. STATUS ALERT DISTRIBUTION status_dist <- field_data %>% - group_by(Status_trigger) %>% + group_by(Status_Alert) %>% summarise( num_fields = n(), acreage = sum(Acreage, na.rm = TRUE), .groups = 'drop' ) %>% - rename(Category = Status_trigger) + rename(Category = Status_Alert) farm_summary$status_distribution <- status_dist diff --git a/r_app/80_report_building_utils.R b/r_app/80_report_building_utils.R index 7b7f4e9..0c5db3c 100644 --- a/r_app/80_report_building_utils.R +++ b/r_app/80_report_building_utils.R @@ -112,8 +112,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre field_df_rounded <- field_df %>% mutate(across(where(is.numeric), ~ round(., 2))) - summary_df_rounded <- summary_df %>% - mutate(across(where(is.numeric), ~ round(., 2))) + # Handle NULL summary_df + summary_df_rounded <- if (!is.null(summary_df)) { + summary_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + } else { + NULL + } output_subdir <- file.path(reports_dir, "kpis", "field_analysis") if (!dir.exists(output_subdir)) { @@ -124,10 +129,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre excel_path <- file.path(output_subdir, excel_filename) excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) + # Build sheets list dynamically sheets <- list( - "Field Data" = field_df_rounded, - "Summary" = summary_df_rounded + "Field Data" = field_df_rounded ) + if (!is.null(summary_df_rounded)) { + sheets[["Summary"]] <- summary_df_rounded + } write_xlsx(sheets, excel_path) message(paste("✓ Field analysis Excel exported to:", excel_path)) diff --git a/r_app/80_weekly_stats_utils.R b/r_app/80_weekly_stats_utils.R index fb5dc8b..a4b460e 100644 --- a/r_app/80_weekly_stats_utils.R +++ b/r_app/80_weekly_stats_utils.R @@ -184,12 +184,17 @@ round_cloud_to_intervals <- function(cloud_pct_clear) { return(NA_character_) } - if (cloud_pct_clear < 50) return("<50%") + if (cloud_pct_clear < 10) return("0-10%") + if (cloud_pct_clear < 20) return("10-20%") + if (cloud_pct_clear < 30) return("20-30%") + if (cloud_pct_clear < 40) return("30-40%") + if (cloud_pct_clear < 50) return("40-50%") if (cloud_pct_clear < 60) return("50-60%") if (cloud_pct_clear < 70) return("60-70%") if (cloud_pct_clear < 80) return("70-80%") if (cloud_pct_clear < 90) return("80-90%") - return(">90%") + if (cloud_pct_clear < 95) return("90-95%") + return("95-100%") } get_ci_percentiles <- function(ci_values) { @@ -420,12 +425,18 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year, range_str <- sprintf("%.1f-%.1f", range_min, range_max) ci_percentiles_str <- get_ci_percentiles(ci_vals) + # Count pixels with CI >= 2 (germination threshold) + GERMINATION_CI_THRESHOLD <- 2.0 + num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE) + num_pixels_total <- length(ci_vals) + pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 + field_rows <- extracted[extracted$ID == field_poly_idx, ] num_total <- nrow(field_rows) num_data <- sum(!is.na(field_rows$CI)) pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 cloud_cat <- if (num_data == 0) "No image available" - else if (pct_clear >= 99.5) "Clear view" + else if (pct_clear >= 95) "Clear view" else "Partial coverage" # Age_week and Phase are now calculated in main script using actual planting dates @@ -440,9 +451,10 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year, results_list[[length(results_list) + 1]] <- data.frame( Field_id = field_id, Mean_CI = round(mean_ci, 2), - CV = round(cv, 4), + CV = round(cv * 100, 2), CI_range = range_str, CI_Percentiles = ci_percentiles_str, + Pct_pixels_CI_gte_2 = pct_pixels_gte_2, Cloud_pct_clear = pct_clear, Cloud_category = cloud_cat, stringsAsFactors = FALSE @@ -482,7 +494,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, current_stats$CV_Trend_Short_Term <- NA_real_ current_stats$Four_week_trend <- NA_real_ current_stats$CV_Trend_Long_Term <- NA_real_ - current_stats$nmr_weeks_in_this_phase <- 1L + current_stats$nmr_of_weeks_analysed <- 1L if (is.null(prev_stats) || nrow(prev_stats) == 0) { message(" No previous week data available - using defaults") @@ -502,7 +514,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, if (length(analysis_files) > 0) { recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, - col_select = c(Field_id, nmr_weeks_in_this_phase, Phase)) + col_select = c(Field_id, nmr_of_weeks_analysed, Phase)) } } }, error = function(e) { @@ -510,7 +522,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, }) if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { - message(paste(" Using previous field_analysis to track nmr_weeks_in_this_phase")) + message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed")) } historical_4weeks <- list() @@ -643,30 +655,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, dplyr::filter(Field_id == field_id) if (nrow(prev_analysis_row) > 0) { - prev_phase_analysis <- prev_analysis_row$Phase[1] - prev_nmr_weeks_analysis <- prev_analysis_row$nmr_weeks_in_this_phase[1] + prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1] - if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase_analysis)) { - if (current_stats$Phase[i] == prev_phase_analysis) { - current_stats$nmr_weeks_in_this_phase[i] <- - if (!is.na(prev_nmr_weeks_analysis)) prev_nmr_weeks_analysis + 1L else 2L - } else { - current_stats$nmr_weeks_in_this_phase[i] <- 1L - } - } - } else if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) { - if (current_stats$Phase[i] == prev_row$Phase[1]) { - current_stats$nmr_weeks_in_this_phase[i] <- 2L + # Only increment nmr_of_weeks_analysed if we have previous data + if (!is.na(prev_nmr_weeks_analysis)) { + current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L } else { - current_stats$nmr_weeks_in_this_phase[i] <- 1L - } - } - } else { - if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) { - if (current_stats$Phase[i] == prev_row$Phase[1]) { - current_stats$nmr_weeks_in_this_phase[i] <- 2L - } else { - current_stats$nmr_weeks_in_this_phase[i] <- 1L + current_stats$nmr_of_weeks_analysed[i] <- 1L } } }