diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index c0f5a2b..a825434 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -316,6 +316,9 @@ main <- function() { # Use centralized paths from setup object (no need for file.path calls) weekly_mosaic <- setup$weekly_mosaic_dir daily_vals_dir <- setup$daily_ci_vals_dir + reports_dir <- setup$kpi_reports_dir + data_dir <- setup$data_dir + tryCatch({ source(here("r_app", "30_growth_model_utils.R")) @@ -389,599 +392,62 @@ main <- function() { message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") message(strrep("=", 70)) - # Set reports_dir for CANE_SUPPLY workflow (used by export functions) - reports_dir <- setup$kpi_reports_dir - data_dir <- setup$data_dir - - # Continue with existing per-field analysis code below - - message("\n", strrep("-", 70)) - message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") - message(strrep("-", 70)) - - weeks <- calculate_week_numbers(end_date) - current_week <- weeks$current_week - current_year <- weeks$current_year - previous_week <- weeks$previous_week - previous_year <- weeks$previous_year - - message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year)) - # Find per-field weekly mosaics - message("Finding per-field weekly mosaics...") + # Define variables needed for workflow functions + reports_dir <- setup$kpi_reports_dir + data_dir <- setup$data_dir + - if (!dir.exists(weekly_mosaic)) { - stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, - "\nScript 40 (mosaic creation) must be run first.")) - } - - field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) - field_dirs <- field_dirs[field_dirs != ""] - - if (length(field_dirs) == 0) { - stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, - "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) - } - - # Verify we have mosaics for this week - single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) - per_field_files <- c() - for (field in field_dirs) { - field_mosaic_dir <- file.path(weekly_mosaic, field) - files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) - if (length(files) > 0) { - per_field_files <- c(per_field_files, files) - } - } - - if (length(per_field_files) == 0) { - stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year, - "\nExpected pattern:", single_file_pattern, - "\nChecked:", weekly_mosaic)) - } - - message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) - - mosaic_mode <- "per-field" - mosaic_dir <- weekly_mosaic - - - - # Load field boundaries + # Load field boundaries for workflow (use data_dir from setup) + message("\nLoading field boundaries for KPI calculation...") tryCatch({ - boundaries_result <- load_field_boundaries(data_dir) - + boundaries_result <- load_field_boundaries(setup$data_dir) + if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { field_boundaries_sf <- boundaries_result$field_boundaries_sf } else { field_boundaries_sf <- boundaries_result } - + if (nrow(field_boundaries_sf) == 0) { stop("No fields loaded from boundaries") } - - message(paste(" Loaded", nrow(field_boundaries_sf), "fields")) + + message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields")) }, error = function(e) { stop("ERROR loading field boundaries: ", e$message) }) + + # Load harvesting data + if (!exists("harvesting_data")) { + warning("harvesting_data not loaded. TCH KPI will use placeholder values.") + harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) + } + + # Extract current week/year from end_date + current_week <- as.numeric(format(end_date, "%V")) + current_year <- as.numeric(format(end_date, "%G")) + - message("Loading historical field data for trend calculations...") - # Load up to 8 weeks (max of 4-week and 8-week trend requirements) - # Function gracefully handles missing weeks and loads whatever exists - num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8 - message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data...")) - - # Only auto-generate on first call (not in recursive calls from within load_historical_field_data) - allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) - - historical_data <- load_historical_field_data(project_dir, current_week, current_year, reports_dir, - num_weeks = num_weeks_to_load, - auto_generate = allow_auto_gen, - field_boundaries_sf = field_boundaries_sf, - daily_vals_dir = daily_vals_dir) - - # Load harvest.xlsx for planting dates (season_start) - message("\nLoading harvest data from harvest.xlsx for planting dates...") - harvest_file_path <- file.path(data_dir, "harvest.xlsx") - - harvesting_data <- tryCatch({ - if (file.exists(harvest_file_path)) { - harvest_raw <- readxl::read_excel(harvest_file_path) - harvest_raw$season_start <- as.Date(harvest_raw$season_start) - harvest_raw$season_end <- as.Date(harvest_raw$season_end) - message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) - harvest_raw - } else { - message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) - NULL - } - }, error = function(e) { - message(paste(" ERROR loading harvest.xlsx:", e$message)) - NULL - }) - - planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) - - # Validate planting_dates - if (is.null(planting_dates) || nrow(planting_dates) == 0) { - message("WARNING: No planting dates available. Using NA for all fields.") - planting_dates <- data.frame( - field_id = field_boundaries_sf$field, - planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), - stringsAsFactors = FALSE - ) - } - # Build per-field configuration - message("\nPreparing mosaic configuration for statistics calculation...") - message(" ✓ Using per-field mosaic architecture (1 TIFF per field)") - - # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif - field_grid <- list( - mosaic_dir = mosaic_dir, - mode = "per-field" - ) - - message("\nUsing modular RDS-based approach for weekly statistics...") - - # Load/calculate CURRENT week stats (from tiles or RDS cache) - message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...") - current_stats <- load_or_calculate_weekly_stats( - week_num = current_week, - year = current_year, + # Call the main orchestrator function from kpi_calculation_utils.R + workflow_results <- calculate_field_analysis_cane_supply( + setup = setup, + client_config = client_config, + end_date = end_date, project_dir = project_dir, + weekly_mosaic = weekly_mosaic, + daily_vals_dir = daily_vals_dir, field_boundaries_sf = field_boundaries_sf, - mosaic_dir = field_grid$mosaic_dir, - reports_dir = reports_dir, - report_date = end_date + data_dir = data_dir ) - - message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week")) - - # Load/calculate PREVIOUS week stats (from RDS cache or tiles) - message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...") - - # Calculate report date for previous week (7 days before current) - prev_report_date <- end_date - 7 - - prev_stats <- load_or_calculate_weekly_stats( - week_num = previous_week, - year = previous_year, - project_dir = project_dir, - field_boundaries_sf = field_boundaries_sf, - mosaic_dir = field_grid$mosaic_dir, - reports_dir = reports_dir, - report_date = prev_report_date - ) - - message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week")) - message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", "))) - message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV)))) - - # Apply trend calculations (requires both weeks) - message("\n3. Calculating trend columns...") - current_stats <- calculate_kpi_trends(current_stats, prev_stats, - project_dir = project_dir, - reports_dir = reports_dir, - current_week = current_week, - year = current_year) - - message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) - - # Load weekly harvest probabilities from script 31 (if available) - # Note: Script 31 saves to reports/kpis/field_stats/ (not field_level) - message("\n4. Loading harvest probabilities from script 31...") - harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats") - harvest_prob_file <- file.path(harvest_prob_dir, - sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) - message(paste(" Looking for:", harvest_prob_file)) - - imminent_prob_data <- tryCatch({ - if (file.exists(harvest_prob_file)) { - prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE, - col_types = readr::cols(.default = readr::col_character())) - message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields")) - prob_df %>% - select(field, imminent_prob, detected_prob) %>% - rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob) - } else { - message(paste(" INFO: Harvest probabilities not available (script 31 not run)")) - NULL - } - }, error = function(e) { - message(paste(" WARNING: Could not load harvest probabilities:", e$message)) - NULL - }) - - # ============================================================================ - # CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R) - # ============================================================================ - - message("\nCalculating gap filling scores (2σ method)...") - - # Process per-field mosaics - message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) - - field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) - - process_gap_for_field <- function(field_file) { - field_id <- basename(dirname(field_file)) - field_bounds <- field_boundaries_by_id[[field_id]] - - if (is.null(field_bounds) || nrow(field_bounds) == 0) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - - tryCatch({ - field_raster <- terra::rast(field_file) - ci_band_name <- "CI" - if (!(ci_band_name %in% names(field_raster))) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - field_ci_band <- field_raster[[ci_band_name]] - names(field_ci_band) <- "CI" - - gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) - - if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - - gap_scores <- gap_result$field_results - gap_scores$Field_id <- gap_scores$field - gap_scores <- gap_scores[, c("Field_id", "gap_score")] - - stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) - }, error = function(e) { - message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) - data.frame(Field_id = field_id, gap_score = NA_real_) - }) - } - - # Process fields sequentially with progress bar - message(" Processing gap scores for ", length(per_field_files), " fields...") - pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) - results_list <- lapply(seq_along(per_field_files), function(idx) { - result <- process_gap_for_field(per_field_files[[idx]]) - utils::setTxtProgressBar(pb, idx) - result - }) - close(pb) - - gap_scores_df <- dplyr::bind_rows(results_list) - - if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { - gap_scores_df <- gap_scores_df %>% - dplyr::group_by(Field_id) %>% - dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") - - message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) - message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) - } else { - message(" WARNING: No gap scores calculated from per-field mosaics") - gap_scores_df <- NULL - } - - # ============================================================================ - # Build final output dataframe with all 22 columns (including Gap_score) - # ============================================================================ - - message("\nBuilding final field analysis output...") - - # Pre-calculate acreages with geometry validation - # This avoids geometry errors during field_analysis construction - acreage_lookup <- tryCatch({ - lookup_df <- field_boundaries_sf %>% - sf::st_drop_geometry() %>% - as.data.frame() %>% - mutate( - geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { - tryCatch({ - sf::st_is_valid(field_boundaries_sf[idx, ]) - }, error = function(e) FALSE) - }), - area_ha = 0 - ) + # Extract results + field_analysis_df <- workflow_results$field_analysis_df + farm_kpi_results <- workflow_results$farm_kpi_results + export_paths <- workflow_results$export_paths - # Calculate area for valid geometries - for (idx in which(lookup_df$geometry_valid)) { - tryCatch({ - area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) - lookup_df$area_ha[idx] <- area_m2 / 10000 - }, error = function(e) { - lookup_df$area_ha[idx] <<- NA_real_ - }) - } - - # Convert hectares to acres - lookup_df %>% - mutate(acreage = area_ha / 0.404686) %>% - select(field, acreage) - }, error = function(e) { - message(paste("Warning: Could not calculate acreages from geometries -", e$message)) - data.frame(field = character(0), acreage = numeric(0)) - }) - - field_analysis_df <- current_stats %>% - mutate( - # Column 2: Farm_Section (user fills) - Farm_Section = NA_character_, - # Column 3: Field_name (from GeoJSON - already have Field_id, can look up) - Field_name = Field_id, - # Column 4: Acreage (calculate from geometry) - Acreage = { - acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)] - if_else(is.na(acreages_vec), 0, acreages_vec) - }, - # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change) - # Column 7: Four_week_trend (from current_stats) - # Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start) - Last_harvest_or_planting_date = { - planting_dates$planting_date[match(Field_id, planting_dates$field_id)] - }, - # Column 9: Age_week (calculated from report date and planting date) - Age_week = { - sapply(seq_len(nrow(current_stats)), function(idx) { - planting_dt <- Last_harvest_or_planting_date[idx] - if (is.na(planting_dt)) { - return(NA_real_) - } - round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0) - }) - }, - # Column 10: Phase (recalculate based on updated Age_week) - Phase = { - sapply(Age_week, function(age) { - if (is.na(age)) return(NA_character_) - if (age >= 0 & age < 4) return("Germination") - if (age >= 4 & age < 17) return("Tillering") - if (age >= 17 & age < 39) return("Grand Growth") - if (age >= 39) return("Maturation") - NA_character_ - }) - }, - # Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends) - # Column 12: Germination_progress (calculated here from CI values) - # Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% - Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) { - if (is.na(pct)) return(NA_character_) - if (pct >= 95) return("95-100%") - else if (pct >= 90) return("90-95%") - else if (pct >= 80) return("80-90%") - else if (pct >= 70) return("70-80%") - else if (pct >= 60) return("60-70%") - else if (pct >= 50) return("50-60%") - else if (pct >= 40) return("40-50%") - else if (pct >= 30) return("30-40%") - else if (pct >= 20) return("20-30%") - else if (pct >= 10) return("10-20%") - else return("0-10%") - }), - # Column 13: Imminent_prob (from script 31 or NA if not available) - Imminent_prob = { - if (!is.null(imminent_prob_data)) { - imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)] - } else { - rep(NA_real_, nrow(current_stats)) - } - }, - # Column 14: Status_Alert (based on harvest probability + crop health status) - # Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA - Status_Alert = { - sapply(seq_len(nrow(current_stats)), function(idx) { - imminent_prob <- Imminent_prob[idx] - age_w <- Age_week[idx] - weekly_ci_chg <- Weekly_ci_change[idx] - mean_ci_val <- Mean_CI[idx] - - # Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months) - if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) { - return("Ready for harvest-check") - } - - # Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5) - if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) { - return("Strong decline in crop health") - } - - # Priority 3: Harvested/bare (Mean CI < 1.5) - if (!is.na(mean_ci_val) && mean_ci_val < 1.5) { - return("Harvested/bare") - } - - # Fallback: no alert - NA_character_ - }) - }, - # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles) - # Column 17: Already in current_stats (CV) - # Column 18: Already in current_stats (CV_Trend_Short_Term) - # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value) - # Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope) - # 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01) - CV_Trend_Long_Term_Category = { - sapply(current_stats$CV_Trend_Long_Term, function(slope) { - if (is.na(slope)) { - return(NA_character_) - } else if (slope < -0.01) { - return("More uniform") - } else if (slope > 0.01) { - return("Less uniform") - } else { - return("Stable uniformity") - } - }) - }, - # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category) - # Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% - Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) { - if (is.na(pct)) return(NA_character_) - if (pct >= 95) return("95-100%") - else if (pct >= 90) return("90-95%") - else if (pct >= 80) return("80-90%") - else if (pct >= 70) return("70-80%") - else if (pct >= 60) return("60-70%") - else if (pct >= 50) return("50-60%") - else if (pct >= 40) return("40-50%") - else if (pct >= 30) return("30-40%") - else if (pct >= 20) return("20-30%") - else if (pct >= 10) return("10-20%") - else return("0-10%") - }), - # Column 22: Gap_score (2σ below median - from kpi_utils.R) - Gap_score = { - if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { - # Debug: Print first few gap scores - message(sprintf(" Joining %d gap scores to field_analysis (first 3: %s)", - nrow(gap_scores_df), - paste(head(gap_scores_df$gap_score, 3), collapse=", "))) - message(sprintf(" First 3 Field_ids in gap_scores_df: %s", - paste(head(gap_scores_df$Field_id, 3), collapse=", "))) - message(sprintf(" First 3 Field_ids in current_stats: %s", - paste(head(current_stats$Field_id, 3), collapse=", "))) - - gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)] - } else { - rep(NA_real_, nrow(current_stats)) - } - } - ) %>% - select( - all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", - "Last_harvest_or_planting_date", "Age_week", "Phase", - "Germination_progress", - "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", - "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", - "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score")) - ) - - message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)")) - - export_paths <- export_field_analysis_excel( - field_analysis_df, - NULL, - project_dir, - current_week, - current_year, - reports_dir - ) - - cat("\n--- Per-field Results (first 10) ---\n") - available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category") - available_cols <- available_cols[available_cols %in% names(field_analysis_df)] - if (length(available_cols) > 0) { - print(head(field_analysis_df[, available_cols], 10)) - } - - # ========== FARM-LEVEL KPI AGGREGATION ========== - # Aggregate the per-field analysis into farm-level summary statistics - - cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n") - - # Filter to only fields that have actual data (non-NA CI and valid acreage) - field_data <- field_analysis_df %>% - filter(!is.na(Mean_CI) & !is.na(Acreage)) %>% - filter(Acreage > 0) - - if (nrow(field_data) > 0) { - - if (nrow(field_data) > 0) { - # Create summary statistics - farm_summary <- list() - - # 1. PHASE DISTRIBUTION - phase_dist <- field_data %>% - group_by(Phase) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Phase) - - farm_summary$phase_distribution <- phase_dist - - # 2. STATUS ALERT DISTRIBUTION - status_dist <- field_data %>% - group_by(Status_Alert) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Status_Alert) - - farm_summary$status_distribution <- status_dist - - # 3. CLOUD COVERAGE DISTRIBUTION - cloud_dist <- field_data %>% - group_by(Cloud_category) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Cloud_category) - - farm_summary$cloud_distribution <- cloud_dist - - # 4. OVERALL STATISTICS - farm_summary$overall_stats <- data.frame( - total_fields = nrow(field_data), - total_acreage = sum(field_data$Acreage, na.rm = TRUE), - mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), - median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), - mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4), - week = current_week, - year = current_year, - date = as.character(end_date) - ) - - # Print summaries - cat("\n--- PHASE DISTRIBUTION ---\n") - print(phase_dist) - - cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n") - print(status_dist) - - cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n") - print(cloud_dist) - - cat("\n--- OVERALL FARM STATISTICS ---\n") - print(farm_summary$overall_stats) - - farm_kpi_results <- farm_summary - } else { - farm_kpi_results <- NULL - } - } else { - farm_kpi_results <- NULL - } - - # ========== FINAL SUMMARY ========== - - cat("\n", strrep("=", 70), "\n") - cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n") - cat(strrep("=", 70), "\n") - cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n") - cat("Excel export:", export_paths$excel, "\n") - cat("RDS export:", export_paths$rds, "\n") - cat("CSV export:", export_paths$csv, "\n") - - if (!is.null(farm_kpi_results)) { - cat("\nFarm-level KPIs: CALCULATED\n") - } else { - cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n") - } - - cat("\n✓ Consolidated KPI calculation complete!\n") - cat(" - Per-field data exported\n") - cat(" - Farm-level KPIs calculated\n") - cat(" - All outputs in:", reports_dir, "\n\n") - - } else { - # Unknown client type - log warning and exit - warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) + } else { + # Unknown client type - log warning and exit + warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "") cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n") cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n") diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index df6e319..bd06aa8 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -31,169 +31,710 @@ library(writexl) # ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section) # ============================================================================ -#' Placeholder: ANGATA harvest readiness assessment +#' Calculate acreage for each field from geometry +#' @param field_boundaries_sf sf object with field geometries +#' @return data.frame with field and acreage columns +calculate_field_acreages <- function(field_boundaries_sf) { + tryCatch({ + lookup_df <- field_boundaries_sf %>% + sf::st_drop_geometry() %>% + as.data.frame() %>% + mutate( + geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { + tryCatch({ + sf::st_is_valid(field_boundaries_sf[idx, ]) + }, error = function(e) FALSE) + }), + area_ha = 0 + ) + + # Calculate area for valid geometries + for (idx in which(lookup_df$geometry_valid)) { + tryCatch({ + area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) + lookup_df$area_ha[idx] <- area_m2 / 10000 + }, error = function(e) { + lookup_df$area_ha[idx] <<- NA_real_ + }) + } + + # Convert hectares to acres + lookup_df %>% + mutate(acreage = area_ha / 0.404686) %>% + select(field, acreage) + }, error = function(e) { + message(paste("Warning: Could not calculate acreages from geometries -", e$message)) + data.frame(field = character(0), acreage = numeric(0)) + }) +} + +#' Calculate age in weeks from planting date #' -#' Future implementation will integrate ANGATA-specific harvest readiness criteria: -#' - Maturation phase detection (CI threshold-based) -#' - Field age tracking (days since planting) -#' - Weather-based ripeness indicators (if available) -#' - Historical yield correlations -#' -#' @param field_ci CI values for the field -#' @param field_age_days Days since planting -#' -#' @return Character string with harvest readiness assessment -assess_harvest_readiness <- function(field_ci, field_age_days = NULL) { - # Placeholder implementation - # Real version would check: - # - Mean CI > 3.5 (maturation threshold) - # - Age > 350 days - # - Weekly growth rate < threshold (mature plateau) - - if (is.null(field_ci) || all(is.na(field_ci))) { - return("No data available") +#' @param planting_date Date of planting +#' @param reference_date Date to calculate age relative to (typically end_date) +#' @return Numeric age in weeks (rounded to nearest week) +calculate_age_week <- function(planting_date, reference_date) { + if (is.na(planting_date)) { + return(NA_real_) } - - mean_ci <- mean(field_ci, na.rm = TRUE) - - if (mean_ci > 3.5) { - return("Ready for harvest") - } else if (mean_ci > 2.5) { - return("Approaching harvest readiness") + round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0) +} + +#' Assign crop phase based on age in weeks +#' +#' @param age_week Numeric age in weeks +#' @return Character phase name +calculate_phase <- function(age_week) { + if (is.na(age_week)) return(NA_character_) + if (age_week >= 0 & age_week < 4) return("Germination") + if (age_week >= 4 & age_week < 17) return("Tillering") + if (age_week >= 17 & age_week < 39) return("Grand Growth") + if (age_week >= 39) return("Maturation") + NA_character_ +} + +#' Bin percentage into 10% intervals with special handling for 90-100% +#' +#' @param pct Numeric percentage value (0-100) +#' @return Character bin label +bin_percentage <- function(pct) { + if (is.na(pct)) return(NA_character_) + if (pct >= 95) return("95-100%") + else if (pct >= 90) return("90-95%") + else if (pct >= 80) return("80-90%") + else if (pct >= 70) return("70-80%") + else if (pct >= 60) return("60-70%") + else if (pct >= 50) return("50-60%") + else if (pct >= 40) return("40-50%") + else if (pct >= 30) return("30-40%") + else if (pct >= 20) return("20-30%") + else if (pct >= 10) return("10-20%") + else return("0-10%") +} + +#' Calculate germination progress from CI threshold percentage +#' +#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2 +#' @return Character bin label +calculate_germination_progress <- function(pct_pixels_ci_gte_2) { + bin_percentage(pct_pixels_ci_gte_2) +} + +#' Categorize CV trend (long-term slope) into qualitative labels +#' +#' @param cv_slope Numeric slope from CV trend analysis +#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform" +categorize_cv_trend_long_term <- function(cv_slope) { + if (is.na(cv_slope)) { + return(NA_character_) + } else if (cv_slope < -0.01) { + return("More uniform") + } else if (cv_slope > 0.01) { + return("Less uniform") } else { - return("Not ready - continue monitoring") + return("Stable uniformity") } } -#' Placeholder: ANGATA supply chain status flags -#' -#' Future implementation will add supply chain-specific status indicators: -#' - Harvest scheduling readiness -#' - Equipment availability impact -#' - Transportation/logistics flags -#' - Quality parameter flags -#' -#' @param field_analysis Data frame with field analysis results -#' -#' @return Data frame with supply chain status columns -assess_supply_chain_status <- function(field_analysis) { - # Placeholder: return field analysis as-is - # Real version would add columns for: - # - schedule_ready (bool) - # - harvest_window_days (numeric) - # - transportation_priority (char) - # - quality_flags (char) +#' Determine status alert based on harvest probability and crop health +#' Priority order: +#' 1. Ready for harvest-check (imminent + mature ≥12 months) +#' 2. Strong decline in crop health (drop ≥2 points but still >1.5) +#' 3. Harvested/bare (Mean CI < 1.5) +#' @param imminent_prob Numeric harvest probability +#' @param age_week Numeric age in weeks +#' @param weekly_ci_change Numeric weekly CI change +#' @param mean_ci Numeric mean CI value +#' @return Character status alert or NA +calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, mean_ci) { + # Priority 1: Ready for harvest-check + if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) { + return("Ready for harvest-check") + } - return(field_analysis) + # Priority 2: Strong decline + if (!is.na(weekly_ci_change) && weekly_ci_change <= -2.0 && !is.na(mean_ci) && mean_ci > 1.5) { + return("Strong decline in crop health") + } + + # Priority 3: Harvested/bare + if (!is.na(mean_ci) && mean_ci < 1.5) { + return("Harvested/bare") + } + + # Fallback: no alert + NA_character_ } +#' Calculate Gap Filling Score KPI (2σ method) +#' @param ci_raster Current week CI raster +#' @param field_boundaries Field boundaries +#' @return Data frame with field-level gap filling scores +calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { + safe_log("Calculating Gap Filling Score KPI (placeholder)") + + # Handle both sf and SpatVector inputs + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions + n_fields_vect <- length(field_boundaries_vect) + n_fields_sf <- nrow(field_boundaries) + + if (n_fields_sf != n_fields_vect) { + warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) + } + + field_results <- data.frame() + + for (i in seq_len(nrow(field_boundaries))) { + field_name <- field_boundaries$field[i] + sub_field_name <- field_boundaries$sub_field[i] + field_vect <- field_boundaries_vect[i] + + # Extract CI values using helper function + ci_values <- extract_ci_values(ci_raster, field_vect) + valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] + + if (length(valid_values) > 1) { + # Gap score using 2σ below median to detect outliers + median_ci <- median(valid_values) + sd_ci <- sd(valid_values) + outlier_threshold <- median_ci - (2 * sd_ci) + low_ci_pixels <- sum(valid_values < outlier_threshold) + total_pixels <- length(valid_values) + gap_score <- (low_ci_pixels / total_pixels) * 100 + + # Classify gap severity + gap_level <- dplyr::case_when( + gap_score < 10 ~ "Minimal", + gap_score < 25 ~ "Moderate", + TRUE ~ "Significant" + ) + + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + gap_level = gap_level, + gap_score = gap_score, + mean_ci = mean(valid_values), + outlier_threshold = outlier_threshold + )) + } else { + # Not enough valid data, fill with NA row + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + gap_level = NA_character_, + gap_score = NA_real_, + mean_ci = NA_real_, + outlier_threshold = NA_real_ + )) + } + } + return(list(field_results = field_results)) +} + + +#' Calculate gap filling scores for all per-field mosaics +#' This is a wrapper function that processes multiple per-field mosaic files +#' and calculates gap scores for each field. +#' @param per_field_files Character vector of paths to per-field mosaic TIFFs +#' @param field_boundaries_sf sf object with field geometries +#' @return data.frame with Field_id and gap_score columns +calculate_gap_scores <- function(per_field_files, field_boundaries_sf) { + message("\nCalculating gap filling scores (2σ method)...") + message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) + + field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) + + process_gap_for_field <- function(field_file) { + field_id <- basename(dirname(field_file)) + field_bounds <- field_boundaries_by_id[[field_id]] + + if (is.null(field_bounds) || nrow(field_bounds) == 0) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + + tryCatch({ + field_raster <- terra::rast(field_file) + ci_band_name <- "CI" + if (!(ci_band_name %in% names(field_raster))) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + field_ci_band <- field_raster[[ci_band_name]] + names(field_ci_band) <- "CI" + + gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) + + if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + + gap_scores <- gap_result$field_results + gap_scores$Field_id <- gap_scores$field + gap_scores <- gap_scores[, c("Field_id", "gap_score")] + + stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) + }, error = function(e) { + message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) + data.frame(Field_id = field_id, gap_score = NA_real_) + }) + } + + # Process fields sequentially with progress bar + message(" Processing gap scores for ", length(per_field_files), " fields...") + pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) + + results_list <- lapply(seq_along(per_field_files), function(idx) { + result <- process_gap_for_field(per_field_files[[idx]]) + utils::setTxtProgressBar(pb, idx) + result + }) + close(pb) + + gap_scores_df <- dplyr::bind_rows(results_list) + + if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { + gap_scores_df <- gap_scores_df %>% + dplyr::group_by(Field_id) %>% + dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") + + message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) + message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", + round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) + } else { + message(" WARNING: No gap scores calculated from per-field mosaics") + gap_scores_df <- NULL + } + + return(gap_scores_df) +} + +#' Build complete per-field KPI dataframe with all 22 columns +#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats +#' @param planting_dates data.frame with field_id and planting_date columns +#' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL) +#' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL) +#' @param field_boundaries_sf sf object with field geometries +#' @param end_date Date object for current report date +#' @return data.frame with all 22 KPI columns +calculate_all_field_kpis <- function(current_stats, + planting_dates, + imminent_prob_data, + gap_scores_df, + field_boundaries_sf, + end_date) { + + message("\nBuilding final field analysis output...") + + # Pre-calculate acreages + acreage_lookup <- calculate_field_acreages(field_boundaries_sf) + + field_analysis_df <- current_stats %>% + mutate( + # Column 2: Farm_Section (user fills manually) + Farm_Section = NA_character_, + + # Column 3: Field_name (from GeoJSON) + Field_name = Field_id, + + # Column 4: Acreage (from geometry) + Acreage = { + acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)] + if_else(is.na(acreages_vec), 0, acreages_vec) + }, + + # Column 8: Last_harvest_or_planting_date (from harvest.xlsx) + Last_harvest_or_planting_date = { + planting_dates$planting_date[match(Field_id, planting_dates$field_id)] + }, + + # Column 9: Age_week (calculated) + Age_week = { + sapply(seq_len(nrow(current_stats)), function(idx) { + calculate_age_week(Last_harvest_or_planting_date[idx], end_date) + }) + }, + + # Column 10: Phase (based on Age_week) + Phase = sapply(Age_week, calculate_phase), + + # Column 12: Germination_progress (binned Pct_pixels_CI_gte_2) + Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress), + + # Column 13: Imminent_prob (from script 31 or NA) + Imminent_prob = { + if (!is.null(imminent_prob_data)) { + imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)] + } else { + rep(NA_real_, nrow(current_stats)) + } + }, + + # Column 14: Status_Alert (multi-priority logic) + Status_Alert = { + sapply(seq_len(nrow(current_stats)), function(idx) { + calculate_status_alert( + Imminent_prob[idx], + Age_week[idx], + Weekly_ci_change[idx], + Mean_CI[idx] + ) + }) + }, + + # Column 19b: CV_Trend_Long_Term_Category (categorical slope) + CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term), + + # Column 21: Cloud_pct_clear (binned into intervals) + Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage), + + # Column 22: Gap_score (2σ method) + Gap_score = { + if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { + gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)] + } else { + rep(NA_real_, nrow(current_stats)) + } + } + ) %>% + select( + all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", + "Last_harvest_or_planting_date", "Age_week", "Phase", + "Germination_progress", + "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", + "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", + "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score")) + ) + + message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns")) + + return(field_analysis_df) +} + +#' Aggregate per-field data into farm-level KPI summary +#' +#' @param field_analysis_df data.frame with per-field KPI data +#' @param current_week Numeric current week number +#' @param current_year Numeric current year +#' @param end_date Date object for current report date +#' @return List with phase_distribution, status_distribution, cloud_distribution, overall_stats +calculate_farm_level_kpis <- function(field_analysis_df, current_week, current_year, end_date) { + + cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n") + + # Filter to only fields with actual data + field_data <- field_analysis_df %>% + filter(!is.na(Mean_CI) & !is.na(Acreage)) %>% + filter(Acreage > 0) + + if (nrow(field_data) == 0) { + message("No valid field data for farm-level aggregation") + return(NULL) + } + + farm_summary <- list() + + # 1. PHASE DISTRIBUTION + phase_dist <- field_data %>% + group_by(Phase) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Phase) + + farm_summary$phase_distribution <- phase_dist + + # 2. STATUS ALERT DISTRIBUTION + status_dist <- field_data %>% + group_by(Status_Alert) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Status_Alert) + + farm_summary$status_distribution <- status_dist + + # 3. CLOUD COVERAGE DISTRIBUTION + cloud_dist <- field_data %>% + group_by(Cloud_category) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Cloud_category) + + farm_summary$cloud_distribution <- cloud_dist + + # 4. OVERALL STATISTICS + farm_summary$overall_stats <- data.frame( + total_fields = nrow(field_data), + total_acreage = sum(field_data$Acreage, na.rm = TRUE), + mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), + median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), + mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4), + week = current_week, + year = current_year, + date = as.character(end_date) + ) + + # Print summaries + cat("\n--- PHASE DISTRIBUTION ---\n") + print(phase_dist) + + cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n") + print(status_dist) + + cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n") + print(cloud_dist) + + cat("\n--- OVERALL FARM STATISTICS ---\n") + print(farm_summary$overall_stats) + + return(farm_summary) +} + + # ============================================================================ # ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS # ============================================================================ -#' Orchestrate ANGATA weekly field analysis and reporting +#' Main orchestrator for CANE_SUPPLY per-field KPI workflow #' -#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows. -#' Currently uses common utilities; future versions will add client-specific logic. +#' This function coordinates all KPI calculations for the per-field analysis workflow. +#' It loads historical data, calculates current/previous week statistics, computes +#' all 22 KPI columns, and aggregates farm-level summaries. #' -#' @param field_boundaries_sf SF object with field geometries -#' @param current_week ISO week number (1-53) -#' @param current_year ISO week year -#' @param mosaic_dir Directory containing weekly mosaics -#' @param field_boundaries_path Path to field GeoJSON -#' @param harvesting_data Data frame with harvest data (optional) -#' @param output_dir Directory for exports -#' @param data_dir Base data directory -#' -#' @return List with field analysis results -#' -#' @details -#' This function: -#' 1. Loads weekly mosaic and extracts field statistics -#' 2. Calculates field statistics (using common utilities) -#' 3. Prepares field analysis summary -#' 4. Exports to Excel/CSV/RDS -#' 5. (Future) Applies ANGATA-specific assessments -#' -calculate_field_analysis_cane_supply <- function( - field_boundaries_sf, - current_week, - current_year, - mosaic_dir, - field_boundaries_path = NULL, - harvesting_data = NULL, - output_dir = file.path(PROJECT_DIR, "output"), - data_dir = NULL -) { +#' @param setup List with directory paths (kpi_reports_dir, data_dir, etc.) +#' @param client_config List with workflow configuration (script_91_compatible, outputs) +#' @param end_date Date object for current report date +#' @param project_dir Character project identifier +#' @param weekly_mosaic Character path to weekly mosaic directory +#' @param daily_vals_dir Character path to daily values directory +#' @param field_boundaries_sf sf object with field geometries +#' @param data_dir Character path to data directory +#' @return List with field_analysis_df, farm_kpi_results, export_paths +calculate_field_analysis_cane_supply <- function(setup, + client_config, + end_date, + project_dir, + weekly_mosaic, + daily_vals_dir, + field_boundaries_sf, + data_dir) { - message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============") + message("\n", strrep("=", 70)) + message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") + message(strrep("=", 70)) - # Load current week mosaic - message("Loading current week mosaic...") - current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year) + reports_dir <- setup$kpi_reports_dir - if (is.null(current_mosaic)) { - warning(paste("Could not load current week mosaic for week", current_week, current_year)) - return(NULL) + # ========== PHASE 1: WEEKLY ANALYSIS SETUP ========== + message("\n", strrep("-", 70)) + message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") + message(strrep("-", 70)) + + weeks <- calculate_week_numbers(end_date) + current_week <- weeks$current_week + current_year <- weeks$current_year + previous_week <- weeks$previous_week + previous_year <- weeks$previous_year + + message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year)) + + # Find per-field weekly mosaics + message("Finding per-field weekly mosaics...") + + if (!dir.exists(weekly_mosaic)) { + stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, + "\nScript 40 (mosaic creation) must be run first.")) } - # Extract field statistics - message("Extracting field statistics from current mosaic...") - field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] - # Load previous week stats for comparison - message("Loading historical data for trends...") - target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) - previous_stats <- NULL - - previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year) - if (!is.null(previous_mosaic)) { - previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + if (length(field_dirs) == 0) { + stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, + "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) } - # Calculate 4-week historical trend - message("Calculating field trends...") - ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds") + # Verify we have mosaics for this week + single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) + per_field_files <- c() + for (field in field_dirs) { + field_mosaic_dir <- file.path(weekly_mosaic, field) + files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(files) > 0) { + per_field_files <- c(per_field_files, files) + } + } - field_analysis <- calculate_field_statistics( - field_stats = field_stats, - previous_stats = previous_stats, + if (length(per_field_files) == 0) { + stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year, + "\nExpected pattern:", single_file_pattern, + "\nChecked:", weekly_mosaic)) + } + + message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) + + # ========== PHASE 2: LOAD HISTORICAL DATA ========== + message("\nLoading historical field data for trend calculations...") + num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) + message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data...")) + + allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) + + historical_data <- load_historical_field_data( + project_dir, current_week, current_year, reports_dir, + num_weeks = num_weeks_to_load, + auto_generate = allow_auto_gen, + field_boundaries_sf = field_boundaries_sf, + daily_vals_dir = daily_vals_dir + ) + + # ========== PHASE 3: LOAD PLANTING DATES ========== + message("\nLoading harvest data from harvest.xlsx for planting dates...") + harvest_file_path <- file.path(data_dir, "harvest.xlsx") + + harvesting_data <- tryCatch({ + if (file.exists(harvest_file_path)) { + harvest_raw <- readxl::read_excel(harvest_file_path) + harvest_raw$season_start <- as.Date(harvest_raw$season_start) + harvest_raw$season_end <- as.Date(harvest_raw$season_end) + message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) + harvest_raw + } else { + message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) + NULL + } + }, error = function(e) { + message(paste(" ERROR loading harvest.xlsx:", e$message)) + NULL + }) + + planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) + + if (is.null(planting_dates) || nrow(planting_dates) == 0) { + message("WARNING: No planting dates available. Using NA for all fields.") + planting_dates <- data.frame( + field_id = field_boundaries_sf$field, + planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), + stringsAsFactors = FALSE + ) + } + + # ========== PHASE 4: CALCULATE WEEKLY STATISTICS ========== + message("\nUsing modular RDS-based approach for weekly statistics...") + + # Current week + message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...") + current_stats <- load_or_calculate_weekly_stats( week_num = current_week, year = current_year, - ci_rds_path = ci_rds_path, + project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, - harvesting_data = harvesting_data + mosaic_dir = weekly_mosaic, + reports_dir = reports_dir, + report_date = end_date + ) + message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week")) + + # Previous week + message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...") + prev_report_date <- end_date - 7 + + prev_stats <- load_or_calculate_weekly_stats( + week_num = previous_week, + year = previous_year, + project_dir = project_dir, + field_boundaries_sf = field_boundaries_sf, + mosaic_dir = weekly_mosaic, + reports_dir = reports_dir, + report_date = prev_report_date + ) + message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week")) + + # ========== PHASE 5: CALCULATE TRENDS ========== + message("\n3. Calculating trend columns...") + current_stats <- calculate_kpi_trends( + current_stats, prev_stats, + project_dir = project_dir, + reports_dir = reports_dir, + current_week = current_week, + year = current_year + ) + message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) + + # ========== PHASE 6: LOAD HARVEST PROBABILITIES ========== + message("\n4. Loading harvest probabilities from script 31...") + harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats") + harvest_prob_file <- file.path(harvest_prob_dir, + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) + message(paste(" Looking for:", harvest_prob_file)) + + imminent_prob_data <- tryCatch({ + if (file.exists(harvest_prob_file)) { + prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE, + col_types = readr::cols(.default = readr::col_character())) + message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields")) + prob_df %>% + select(field, imminent_prob, detected_prob) %>% + rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob) + } else { + message(paste(" INFO: Harvest probabilities not available (script 31 not run)")) + NULL + } + }, error = function(e) { + message(paste(" WARNING: Could not load harvest probabilities:", e$message)) + NULL + }) + + # ========== PHASE 7: CALCULATE GAP SCORES ========== + gap_scores_df <- calculate_gap_scores(per_field_files, field_boundaries_sf) + + # ========== PHASE 8: BUILD FINAL PER-FIELD DATAFRAME ========== + field_analysis_df <- calculate_all_field_kpis( + current_stats = current_stats, + planting_dates = planting_dates, + imminent_prob_data = imminent_prob_data, + gap_scores_df = gap_scores_df, + field_boundaries_sf = field_boundaries_sf, + end_date = end_date ) - if (is.null(field_analysis)) { - message("Could not generate field analysis") - return(NULL) - } - - # Generate summary - message("Generating summary statistics...") - summary_df <- generate_field_analysis_summary(field_analysis) - - # Export - message("Exporting field analysis...") + # ========== PHASE 9: EXPORT PER-FIELD RESULTS ========== export_paths <- export_field_analysis_excel( - field_analysis, - summary_df, - PROJECT_DIR, + field_analysis_df, + NULL, + project_dir, current_week, current_year, - output_dir + reports_dir ) - message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n")) + # cat("\n--- Per-field Results (first 10) ---\n") + # available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category") + # available_cols <- available_cols[available_cols %in% names(field_analysis_df)] + # if (length(available_cols) > 0) { + # print(head(field_analysis_df[, available_cols], 10)) + # } - result <- list( - field_analysis = field_analysis, - summary = summary_df, - exports = export_paths - ) - - return(result) + # # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ========== + # farm_kpi_results <- calculate_farm_level_kpis( + # field_analysis_df, + # current_week, + # current_year, + # end_date + # ) } # ============================================================================