# 09_FIELD_ANALYSIS_WEEKLY.R # ========================== # Per-field weekly analysis with phase detection and status triggers # Generates detailed field-level CSV export with: # - Field identifiers and areas # - Weekly CI change (mean ± std) # - Age-based phase assignment (Germination, Tillering, Grand Growth, Maturation) # - Harvest imminence detection (Phase 1 from LSTM model) # - Status triggers (non-exclusive, can coexist with harvest imminent phase) # - Phase transition tracking (weeks in current phase) # - Cloud coverage analysis from 8-band satellite data (band 9 = cloud mask) # # Harvest Imminence: # - Runs LSTM Phase 1 detection to get imminent_prob (probability harvest in next ~4 weeks) # - If imminent_prob > 0.5, phase = "Harvest Imminent" (overrides age-based phase) # - Weekly predictions exported to separate Excel for tracking # # Cloud Coverage Categories (from band 9: 1=clear, 0=cloudy): # - Clear view: >=99.5% clear pixels (100% practical coverage) # - Partial coverage: 0-99.5% clear pixels (some cloud interference) # - No image available: 0% clear pixels (completely clouded) # # Output: # - Excel (.xlsx) with Field Data sheet and Summary sheet # - Excel (.xlsx) weekly harvest predictions for tracking # - RDS file with field_analysis and field_analysis_summary for Rmd reports # - Summary includes: Monitored area, Cloud coverage, Phase distribution, Status triggers # # Usage: Rscript 09_field_analysis_weekly.R [end_date] [offset] [project_dir] # - end_date: End date for analysis (YYYY-MM-DD format), default: today # - offset: Number of days to look back (for consistency, not currently used) # - project_dir: Project directory name (e.g., "aura", "esa", "angata") # 1. Load required libraries suppressPackageStartupMessages({ library(here) library(sf) library(terra) library(dplyr) library(tidyr) library(lubridate) library(readr) library(readxl) library(writexl) # Optional: torch for harvest model inference (will skip if not available) tryCatch({ library(torch) }, error = function(e) { message("Note: torch package not available - harvest model inference will be skipped") }) }) # ============================================================================ # PHASE AND STATUS TRIGGER DEFINITIONS # ============================================================================ PHASE_DEFINITIONS <- data.frame( phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), age_start = c(0, 4, 17, 39), age_end = c(6, 16, 39, 200), stringsAsFactors = FALSE ) STATUS_TRIGGERS <- data.frame( trigger = c( "germination_started", "germination_complete", "stress_detected_whole_field", "strong_recovery", "growth_on_track", "maturation_progressing", "harvest_ready" ), age_min = c(0, 0, NA, NA, 4, 39, 45), age_max = c(6, 6, NA, NA, 39, 200, 200), description = c( "10% of field CI > 2", "70% of field CI >= 2", "CI decline > -1.5 + low CV", "CI increase > +1.5", "CI increasing consistently", "High CI, stable/declining", "Age 45+ weeks (ready to harvest)" ), stringsAsFactors = FALSE ) # ============================================================================ # HELPER FUNCTIONS # ============================================================================ get_phase_by_age <- function(age_weeks) { if (is.na(age_weeks)) return(NA_character_) for (i in seq_len(nrow(PHASE_DEFINITIONS))) { if (age_weeks >= PHASE_DEFINITIONS$age_start[i] && age_weeks <= PHASE_DEFINITIONS$age_end[i]) { return(PHASE_DEFINITIONS$phase[i]) } } return("Unknown") } get_status_trigger <- function(ci_values, ci_change, age_weeks) { if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) ci_values <- ci_values[!is.na(ci_values)] if (length(ci_values) == 0) return(NA_character_) pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100 pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100 ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0 mean_ci <- mean(ci_values, na.rm = TRUE) # Germination phase triggers (age 0-6) if (age_weeks >= 0 && age_weeks <= 6) { if (pct_at_or_above_2 >= 70) { return("germination_complete") } else if (pct_above_2 > 10) { return("germination_started") } } # Harvest ready (45+ weeks) - check first to prioritize if (age_weeks >= 45) { return("harvest_ready") } # Stress detection (any phase except Germination) if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) { return("stress_detected_whole_field") } # Strong recovery (any phase except Germination) if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) { return("strong_recovery") } # Growth on track (Tillering/Grand Growth, 4-39 weeks) if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) { return("growth_on_track") } # Maturation progressing (39-45 weeks, high CI stable/declining) if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) { return("maturation_progressing") } return(NA_character_) } load_previous_week_csv <- function(project_dir, current_week, reports_dir) { lookback_weeks <- c(1, 2, 3) for (lookback in lookback_weeks) { previous_week <- current_week - lookback if (previous_week < 1) previous_week <- previous_week + 52 csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", previous_week), ".csv") csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) if (file.exists(csv_path)) { tryCatch({ prev_data <- read.csv(csv_path, stringsAsFactors = FALSE) message(if (lookback == 1) paste("Loaded previous week CSV (week", previous_week, ")") else paste("Loaded fallback CSV from", lookback, "weeks ago (week", previous_week, ")")) return(list(data = prev_data, weeks_lookback = lookback, found = TRUE)) }, error = function(e) { message(paste("Warning: Could not read CSV from week", previous_week)) }) } } message("No previous field analysis CSV found. Phase tracking will be age-based only.") return(list(data = NULL, weeks_lookback = NA, found = FALSE)) } # ============================================================================ # UNIFORM AGE MODE - Using fixed planting date for all fields # TO SWITCH BACK: Set USE_UNIFORM_AGE <- FALSE and uncomment original code # ============================================================================ USE_UNIFORM_AGE <- TRUE UNIFORM_PLANTING_DATE <- as.Date("2025-01-01") # All fields planted this date extract_planting_dates <- function(harvesting_data) { if (USE_UNIFORM_AGE) { message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) return(data.frame( field_id = NA, # NA = apply to all fields planting_date = UNIFORM_PLANTING_DATE, stringsAsFactors = FALSE )) } # ORIGINAL CODE (commented out - uncomment when switching back to real harvest data): # if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { # message("Warning: No harvesting data available.") # return(NULL) # } # # tryCatch({ # # Get the MOST RECENT season_start for each field (most recent row per field) # planting_dates <- harvesting_data %>% # arrange(field, desc(season_start)) %>% # distinct(field, .keep_all = TRUE) %>% # select(field, season_start) %>% # rename(field_id = field, planting_date = season_start) %>% # filter(!is.na(planting_date)) %>% # as.data.frame() # # message(paste("Extracted planting dates for", nrow(planting_dates), "fields (most recent season)")) # return(planting_dates) # }, error = function(e) { # message(paste("Error extracting planting dates:", e$message)) # return(NULL) # }) } # ============================================================================ # CLOUD ANALYSIS FUNCTION # ============================================================================ calculate_field_cloud_coverage <- function(field_boundaries_sf, merged_tif_8b_dir, current_week, current_year) { message("Calculating per-field cloud coverage from weekly cloud mosaic...") # Check if cloud data directory exists if (!dir.exists(merged_tif_8b_dir)) { message(paste("Warning: Cloud data directory not found:", merged_tif_8b_dir)) return(NULL) } # Get all TIF files (format: YYYY-MM-DD.tif) tif_files <- list.files(merged_tif_8b_dir, pattern = "\\.tif$", full.names = TRUE) if (length(tif_files) == 0) { message("Warning: No 8-band TIF files found in cloud data directory") return(NULL) } # Extract dates from filenames and filter for current week file_dates <- as.Date(gsub("\\.tif$", "", basename(tif_files))) current_week_date <- as.Date(paste(current_year, "01-01", sep = "-")) + (current_week - 1) * 7 week_start <- current_week_date - as.numeric(format(current_week_date, "%w")) week_end <- week_start + 6 week_files <- tif_files[file_dates >= week_start & file_dates <= week_end] if (length(week_files) == 0) { message(paste("Warning: No 8-band files found for week", current_week)) return(NULL) } message(paste("Found", length(week_files), "cloud mask files for week", current_week)) # Step 1: Create weekly cloud mosaic using max aggregation (same as CI mosaic) message("Creating weekly cloud mosaic using max aggregation...") cloud_rasters <- list() for (i in seq_along(week_files)) { tryCatch({ r <- terra::rast(week_files[i]) # Band 9 is the cloud mask (1 = clear, 0 = cloudy) if (nlyr(r) < 9) { message(paste(" Warning: File has only", nlyr(r), "bands, need 9 (band 9 = cloud mask)")) next } cloud_band <- r[[9]] cloud_rasters[[i]] <- cloud_band }, error = function(e) { message(paste(" Error processing", basename(week_files[i]), ":", e$message)) }) } if (length(cloud_rasters) == 0) { message("Warning: No valid cloud mask data found") return(NULL) } # Create mosaic: use max value to prefer clear pixels (1 = clear, 0 = cloudy) if (length(cloud_rasters) == 1) { cloud_mosaic <- cloud_rasters[[1]] } else { # Filter out NULL entries cloud_rasters <- cloud_rasters[!sapply(cloud_rasters, is.null)] rsrc <- terra::sprc(cloud_rasters) cloud_mosaic <- terra::mosaic(rsrc, fun = "max") } message("Weekly cloud mosaic created") # Step 2: Extract cloud coverage per field from the mosaic message("Extracting cloud coverage per field...") cloud_summary <- data.frame( field_id = character(), pct_clear = numeric(), cloud_category = character(), stringsAsFactors = FALSE ) for (i in seq_len(nrow(field_boundaries_sf))) { tryCatch({ field_id <- field_boundaries_sf$field[i] field_geom <- terra::vect(sf::as_Spatial(field_boundaries_sf[i, ])) # Extract cloud values from mosaic (1 = clear, 0 = cloudy) cloud_vals <- terra::extract(cloud_mosaic, field_geom)[, 2] cloud_vals <- cloud_vals[!is.na(cloud_vals)] if (length(cloud_vals) > 0) { # Calculate % clear (value 1 = clear, 0 = cloudy) pct_clear <- (sum(cloud_vals == 1) / length(cloud_vals)) * 100 # Categorize: 100% = Clear, 0-99.9% = Partial, 0% = No Image if (pct_clear >= 99.5) { category <- "Clear view" } else if (pct_clear > 0 && pct_clear < 99.5) { category <- "Partial coverage" } else { category <- "No image available" } cloud_summary <- rbind(cloud_summary, data.frame( field_id = field_id, pct_clear = round(pct_clear, 1), cloud_category = category, stringsAsFactors = FALSE )) } }, error = function(e) { message(paste(" Error extracting cloud data for field", field_id, ":", e$message)) }) } message(paste("Cloud analysis complete for", nrow(cloud_summary), "fields")) return(cloud_summary) } #' Calculate per-field cloud coverage from weekly mosaic #' #' @param mosaic_raster The weekly CI mosaic raster (already cloud-masked) #' @param field_boundaries_sf Field boundaries as sf object #' @return Data frame with per-field cloud coverage (clear acres, % clear, category) #' calculate_cloud_coverage_from_mosaic <- function(mosaic_raster, field_boundaries_sf) { message("Calculating per-field cloud coverage from weekly mosaic...") if (is.null(mosaic_raster) || class(mosaic_raster)[1] != "SpatRaster") { message("Warning: Invalid mosaic raster provided") return(NULL) } # Extract CI band (last band) ci_band <- mosaic_raster[[nlyr(mosaic_raster)]] # Ensure CRS matches if (!terra::same.crs(ci_band, field_boundaries_sf)) { field_boundaries_sf <- sf::st_transform(field_boundaries_sf, sf::st_crs(ci_band)) } cloud_data <- data.frame( field_id = character(), sub_field = character(), clear_pixels = numeric(), total_pixels = numeric(), missing_pixels = numeric(), pct_clear = numeric(), cloud_category = character(), stringsAsFactors = FALSE ) # Process each field for (i in seq_len(nrow(field_boundaries_sf))) { field_id <- field_boundaries_sf$field[i] sub_field <- field_boundaries_sf$sub_field[i] # Extract pixel values from field using terra::extract (memory-efficient, native terra) extracted_vals <- tryCatch({ result <- terra::extract(ci_band, field_boundaries_sf[i, ], cells = TRUE, na.rm = FALSE) # terra::extract returns a data.frame with ID and value columns; extract values only if (is.data.frame(result) && nrow(result) > 0) { data.frame(value = result$value, cells = result$cell) } else { NULL } }, error = function(e) { NULL }) # Skip if extraction failed or returned empty if (is.null(extracted_vals) || !is.data.frame(extracted_vals) || nrow(extracted_vals) == 0) { next } # Count pixels: data vs missing ci_vals <- extracted_vals$value num_data <- sum(!is.na(ci_vals)) num_total <- length(ci_vals) num_missing <- num_total - num_data pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 # Categorize category <- if (pct_clear >= 85) "Clear view" else if (pct_clear > 0) "Partial coverage" else "No image available" # Store result cloud_data <- rbind(cloud_data, data.frame( field_id = field_id, sub_field = sub_field, clear_pixels = num_data, total_pixels = num_total, missing_pixels = num_missing, pct_clear = pct_clear, cloud_category = category, stringsAsFactors = FALSE )) } message(paste("Per-field cloud analysis complete for", nrow(cloud_data), "fields")) return(cloud_data) } # ============================================================================ # HARVEST IMMINENCE DETECTION FUNCTION # ============================================================================ calculate_harvest_imminence <- function(ci_data, field_boundaries_sf, harvest_model_dir, imminent_threshold = 0.5) { message("Calculating harvest imminence probabilities...") # Check if model files exist model_path <- file.path(harvest_model_dir, "model.pt") config_path <- file.path(harvest_model_dir, "config.json") scalers_path <- file.path(harvest_model_dir, "scalers.pkl") if (!file.exists(model_path) || !file.exists(config_path) || !file.exists(scalers_path)) { message("Warning: Harvest model files not found in", harvest_model_dir) return(NULL) } tryCatch({ # Load model using harvest_date_pred_utils # Note: This requires torch package and Python utilities, skip if not available harvest_utils_path <- file.path(harvest_model_dir, "harvest_date_pred_utils.R") if (!file.exists(harvest_utils_path)) { message("Note: Harvest utils R file not found, using age-based trigger only") return(NULL) } source(harvest_utils_path) # Load model, config, and scalers model <- torch::torch_load(model_path) model$eval() config <- jsonlite::read_json(config_path) scalers <- reticulate::py_load_object(scalers_path) # Run Phase 1 detection for each field harvest_results <- data.frame( field_id = character(), imminent_prob = numeric(), harvest_imminent = logical(), stringsAsFactors = FALSE ) for (field in unique(ci_data$field)) { tryCatch({ field_data <- ci_data %>% filter(field == !!field) %>% arrange(Date) if (nrow(field_data) < 10) { message(paste(" Skipping field", field, "- insufficient CI data")) next } # Extract CI values ci_vals <- field_data$value # Run Phase 1 (growing window detection with imminent probability) phase1_result <- run_phase1_growing_window( ci_values = ci_vals, scalers = scalers, config = config, model = model ) if (!is.null(phase1_result)) { imminent_prob <- phase1_result$imminent_prob harvest_imminent <- imminent_prob > imminent_threshold harvest_results <- rbind(harvest_results, data.frame( field_id = field, imminent_prob = round(imminent_prob, 3), harvest_imminent = harvest_imminent, stringsAsFactors = FALSE )) } }, error = function(e) { message(paste(" Error processing field", field, ":", e$message)) }) } message(paste("Harvest imminence detection complete for", nrow(harvest_results), "fields")) return(harvest_results) }, error = function(e) { message(paste("Error in harvest imminence calculation:", e$message)) return(NULL) }) } # ============================================================================ # MAIN CALCULATION FUNCTION # ============================================================================ calculate_per_field_analysis <- function(current_ci, previous_ci, field_boundaries_sf, previous_week_result = NULL, planting_dates = NULL, report_date = Sys.Date(), cloud_data = NULL, harvest_imminence_data = NULL) { message("Calculating per-field analysis...") # Debug CRS info message(paste(" Current CI CRS:", terra::crs(current_ci))) message(paste(" Field boundaries CRS:", sf::st_crs(field_boundaries_sf)$input)) # Debug mosaic info ci_ext <- terra::ext(current_ci) ci_summary <- terra::global(current_ci, "range", na.rm = TRUE) message(paste(" Mosaic extent:", paste(round(c(ci_ext$xmin, ci_ext$xmax, ci_ext$ymin, ci_ext$ymax), 4), collapse=", "))) message(paste(" Mosaic value range:", paste(round(as.numeric(ci_summary), 4), collapse=" to "))) previous_week_csv <- NULL weeks_lookback <- NA if (!is.null(previous_week_result) && previous_week_result$found) { previous_week_csv <- previous_week_result$data weeks_lookback <- previous_week_result$weeks_lookback } field_analysis <- list() field_count <- 0 for (i in seq_len(nrow(field_boundaries_sf))) { field_id <- field_boundaries_sf$field[i] farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) { field_boundaries_sf$sub_area[i] } else { NA_character_ } field_name <- field_boundaries_sf$field[i] # Check if geometry is valid and non-empty field_sf <- field_boundaries_sf[i, ] if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) { message(paste(" Skipping", field_id, "- invalid or empty geometry")) next } # Calculate field area from geometry (in hectares) tryCatch({ field_geom_calc <- terra::vect(sf::as_Spatial(field_sf)) terra::crs(field_geom_calc) <- terra::crs(current_ci) # Set CRS to match field_area_ha <- terra::expanse(field_geom_calc) / 10000 # Convert m² to hectares field_area_acres <- field_area_ha / 0.404686 # Convert to acres }, error = function(e) { message(paste(" Skipping", field_id, "- error calculating geometry:", e$message)) next }) # Extract current CI values with CRS validation tryCatch({ field_geom <- terra::vect(sf::as_Spatial(field_sf)) # Set CRS explicitly to match raster (as_Spatial loses CRS info) terra::crs(field_geom) <- terra::crs(current_ci) extract_result <- terra::extract(current_ci, field_geom) current_ci_vals <- extract_result[, 2] current_ci_vals <- current_ci_vals[!is.na(current_ci_vals)] }, error = function(e) { message(paste(" Error extracting CI for", field_id, ":", e$message)) current_ci_vals <<- c() }) if (length(current_ci_vals) == 0) { if (i <= 5) { # Debug first 5 fields only message(paste(" DEBUG: Field", field_id, "- extract returned empty, geometry may not overlap or all NA")) } next } # Calculate CI statistics mean_ci_current <- mean(current_ci_vals, na.rm = TRUE) cv_current <- sd(current_ci_vals, na.rm = TRUE) / mean_ci_current range_min <- min(current_ci_vals, na.rm = TRUE) range_max <- max(current_ci_vals, na.rm = TRUE) # Calculate weekly CI change weekly_ci_change <- NA range_str <- sprintf("%.1f-%.1f", range_min, range_max) if (!is.null(previous_ci)) { previous_ci_vals <- terra::extract(previous_ci, field_geom)[, 2] previous_ci_vals <- previous_ci_vals[!is.na(previous_ci_vals)] if (length(previous_ci_vals) > 0) { mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE) weekly_ci_change <- mean_ci_current - mean_ci_previous } } # Format CI change ci_std <- sd(current_ci_vals, na.rm = TRUE) if (is.na(weekly_ci_change)) { weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std) } else { direction <- if (weekly_ci_change > 0) "+" else "" weekly_ci_change_str <- sprintf("%s%.1f (%.1f ± %.2f)", direction, weekly_ci_change, mean_ci_current, ci_std) } # Get field age from most recent season_start in harvest.xlsx (relative to report_date) age_weeks <- NA if (!is.null(planting_dates)) { # Check for exact field match OR if field_id is NA (uniform age mode) planting_row <- which(tolower(planting_dates$field_id) == tolower(field_id) | is.na(planting_dates$field_id)) if (length(planting_row) > 0) { planting_date <- as.Date(planting_dates$planting_date[planting_row[1]]) age_weeks <- as.integer(difftime(report_date, planting_date, units = "weeks")) } } # Calculate germination progression (for germination phase only) pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100 pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100 germination_progress_str <- NA_character_ if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) { germination_progress_str <- sprintf("%.0f%% > 2, %.0f%% ≥ 2", pct_ci_above_2, pct_ci_ge_2) } # Assign phase and trigger # Priority: Check harvest imminence first (overrides age-based phase) phase <- "Unknown" imminent_prob_val <- NA if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) { harvest_row <- which(harvest_imminence_data$field_id == field_id) if (length(harvest_row) > 0) { imminent_prob_val <- harvest_imminence_data$imminent_prob[harvest_row[1]] if (harvest_imminence_data$harvest_imminent[harvest_row[1]]) { phase <- "Harvest Imminent" } } } # If not harvest imminent, use age-based phase if (phase == "Unknown") { phase <- get_phase_by_age(age_weeks) } status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks) # Track phase transitions nmr_weeks_in_phase <- 1 if (!is.null(previous_week_csv)) { prev_row <- which(previous_week_csv$Field_id == field_id) if (length(prev_row) > 0) { prev_phase <- previous_week_csv$`Phase (age based)`[prev_row[1]] prev_weeks <- as.numeric(previous_week_csv$nmr_weeks_in_this_phase[prev_row[1]]) if (!is.na(prev_phase) && prev_phase == phase) { nmr_weeks_in_phase <- prev_weeks + if (!is.na(weeks_lookback) && weeks_lookback > 1) weeks_lookback else 1 } else { nmr_weeks_in_phase <- if (!is.na(weeks_lookback) && weeks_lookback > 1) weeks_lookback else 1 } } } field_count <- field_count + 1 # Get cloud data for this field if available cloud_pct <- NA cloud_cat <- NA if (!is.null(cloud_data) && nrow(cloud_data) > 0) { cloud_row <- which(cloud_data$field_id == field_id) if (length(cloud_row) > 0) { cloud_pct <- cloud_data$pct_clear[cloud_row[1]] cloud_cat <- cloud_data$cloud_category[cloud_row[1]] } } field_analysis[[field_count]] <- data.frame( Field_id = field_id, Farm_Section = farm_section, Field_name = field_name, Acreage = round(field_area_acres, 2), Mean_CI = round(mean_ci_current, 2), Weekly_ci_change = weekly_ci_change_str, Germination_progress = germination_progress_str, Age_week = age_weeks, `Phase (age based)` = phase, nmr_weeks_in_this_phase = nmr_weeks_in_phase, Imminent_prob = imminent_prob_val, Status_trigger = status_trigger, CI_range = range_str, CV = round(cv_current, 3), Cloud_pct_clear = cloud_pct, Cloud_category = cloud_cat, stringsAsFactors = FALSE, check.names = FALSE ) } if (length(field_analysis) == 0) { message("ERROR: No fields processed!") return(data.frame()) } field_df <- do.call(rbind, field_analysis) rownames(field_df) <- NULL message(paste("Per-field analysis completed for", nrow(field_df), "fields")) return(field_df) } generate_field_analysis_summary <- function(field_df) { message("Generating summary statistics...") # Total acreage (FIRST - needed for all percentages) total_acreage <- sum(field_df$Acreage, na.rm = TRUE) # Phase breakdown germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE) tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE) grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE) maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE) unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE) # Status trigger breakdown harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) # Cloud coverage breakdown clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) monitored_acreage <- clear_acreage + partial_acreage + no_image_acreage # Total monitored # Count fields by cloud category clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) total_fields <- nrow(field_df) # Create summary as a proper table with clear structure summary_df <- data.frame( Category = c( "MONITORED AREA", "Total Monitored Acreage", "", "CLOUD COVERAGE", "Clear view (# fields)", "Clear view (acres)", "Partial coverage (# fields)", "Partial coverage (acres)", "No image available (# fields)", "No image available (acres)", "", "PHASE DISTRIBUTION", "Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase", "", "STATUS TRIGGERS", "Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected", "Germination Complete", "Germination Started", "No Active Trigger", "", "TOTAL FARM", "Total Acreage" ), Acreage = c( NA, round(monitored_acreage, 2), NA, NA, paste0(clear_fields, " fields"), round(clear_acreage, 2), paste0(partial_fields, " fields"), round(partial_acreage, 2), paste0(no_image_fields, " fields"), round(no_image_acreage, 2), NA, NA, round(germination_acreage, 2), round(tillering_acreage, 2), round(grand_growth_acreage, 2), round(maturation_acreage, 2), round(unknown_phase_acreage, 2), NA, NA, round(harvest_ready_acreage, 2), round(recovery_acreage, 2), round(growth_on_track_acreage, 2), round(stress_acreage, 2), round(germination_complete_acreage, 2), round(germination_started_acreage, 2), round(no_trigger_acreage, 2), NA, NA, round(total_acreage, 2) ), stringsAsFactors = FALSE ) # Add metadata as attributes for use in report attr(summary_df, "cloud_fields_clear") <- clear_fields attr(summary_df, "cloud_fields_partial") <- partial_fields attr(summary_df, "cloud_fields_no_image") <- no_image_fields attr(summary_df, "cloud_fields_total") <- total_fields return(summary_df) } export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { message("Exporting per-field analysis to Excel and RDS...") # Save to kpis/field_analysis subfolder output_subdir <- file.path(reports_dir, "kpis", "field_analysis") if (!dir.exists(output_subdir)) { dir.create(output_subdir, recursive = TRUE) } # Create Excel with two sheets: Field Data and Summary excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx") excel_path <- file.path(output_subdir, excel_filename) # Normalize path for Windows excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) # Prepare sheets as a list sheets <- list( "Field Data" = field_df, "Summary" = summary_df ) # Export to Excel write_xlsx(sheets, excel_path) message(paste("✓ Field analysis Excel exported to:", excel_path)) # Also save as RDS for 91_CI_report_with_kpis_Angata.Rmd to consume # RDS format: list with field_analysis and field_analysis_summary for compatibility kpi_data <- list( field_analysis = field_df, field_analysis_summary = summary_df, metadata = list( project = project_dir, current_week = current_week, export_time = Sys.time() ) ) rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds") rds_path <- file.path(reports_dir, "kpis", rds_filename) saveRDS(kpi_data, rds_path) message(paste("✓ Field analysis RDS exported to:", rds_path)) return(list(excel = excel_path, rds = rds_path)) } export_harvest_predictions <- function(harvest_data, project_dir, current_week, reports_dir) { message("Exporting weekly harvest predictions...") if (is.null(harvest_data) || nrow(harvest_data) == 0) { message("No harvest predictions to export") return(NULL) } # Save to kpis/harvest_predictions subfolder output_subdir <- file.path(reports_dir, "kpis", "harvest_predictions") if (!dir.exists(output_subdir)) { dir.create(output_subdir, recursive = TRUE) } # Create Excel with harvest predictions excel_filename <- paste0(project_dir, "_harvest_predictions_week", sprintf("%02d", current_week), ".xlsx") excel_path <- file.path(output_subdir, excel_filename) # Normalize path for Windows excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) # Prepare data for export export_df <- harvest_data %>% select(field_id, imminent_prob, harvest_imminent) %>% mutate(harvest_imminent = ifelse(harvest_imminent, "Yes", "No")) # Export to Excel write_xlsx(list("Predictions" = export_df), excel_path) message(paste("✓ Weekly harvest predictions exported to:", excel_path)) return(excel_path) } # ============================================================================ # MAIN # ============================================================================ main <- function() { args <- commandArgs(trailingOnly = TRUE) end_date <- if (length(args) >= 1 && !is.na(args[1])) { as.Date(args[1]) } else if (exists("end_date_str", envir = .GlobalEnv)) { as.Date(get("end_date_str", envir = .GlobalEnv)) } else { Sys.Date() } offset <- if (length(args) >= 2 && !is.na(args[2])) { as.numeric(args[2]) } else if (exists("offset", envir = .GlobalEnv)) { get("offset", envir = .GlobalEnv) } else { 7 } project_dir <- if (length(args) >= 3 && !is.na(args[3])) { as.character(args[3]) } else if (exists("project_dir", envir = .GlobalEnv)) { get("project_dir", envir = .GlobalEnv) } else { "angata" } assign("project_dir", project_dir, envir = .GlobalEnv) # Load utilities and configuration source(here("r_app", "crop_messaging_utils.R")) source(here("r_app", "parameters_project.R")) message("=== FIELD ANALYSIS WEEKLY ===") message(paste("Date:", end_date)) message(paste("Project:", project_dir)) # Calculate week numbers weeks <- list( current_week = as.numeric(format(end_date, "%V")), previous_week = as.numeric(format(end_date, "%V")) - 1, year = as.numeric(format(end_date, "%Y")) ) if (weeks$previous_week < 1) weeks$previous_week <- 52 message(paste("Week:", weeks$current_week, "/ Year:", weeks$year)) # Load CI mosaics load_weekly_ci <- function(week_num, year, mosaic_dir) { week_file <- sprintf("week_%02d_%d.tif", week_num, year) week_path <- file.path(mosaic_dir, week_file) if (!file.exists(week_path)) { message(paste(" Mosaic not found:", week_file)) return(NULL) } tryCatch({ mosaic_raster <- terra::rast(week_path) message(paste(" Mosaic bands:", terra::nlyr(mosaic_raster), "layers")) # Check for named CI band, otherwise extract band 5 (CI is typically the 5th band) if ("CI" %in% names(mosaic_raster)) { ci_raster <- mosaic_raster[["CI"]] message(paste(" Extracted named CI band")) } else if (terra::nlyr(mosaic_raster) >= 5) { # Mosaic has 5+ bands: R, G, B, NIR, CI - extract band 5 ci_raster <- mosaic_raster[[5]] message(paste(" Extracted band 5 (CI) from multi-band mosaic")) } else { # Fallback to band 1 if fewer than 5 bands ci_raster <- mosaic_raster[[1]] message(paste(" Using band 1 (only", terra::nlyr(mosaic_raster), "bands available)")) } names(ci_raster) <- "CI" message(paste(" ✓ Loaded:", week_file)) return(ci_raster) }, error = function(e) { message(paste(" Error loading:", e$message)) return(NULL) }) } message("Loading CI mosaics...") current_ci <- load_weekly_ci(weeks$current_week, weeks$year, weekly_CI_mosaic) previous_ci <- load_weekly_ci(weeks$previous_week, weeks$year, weekly_CI_mosaic) if (is.null(current_ci)) { stop("Current week CI mosaic is required but not found") } # Load historical data previous_week_result <- load_previous_week_csv(project_dir, weeks$current_week, reports_dir) planting_dates <- extract_planting_dates(harvesting_data) # ============================================================================ # HARVEST IMMINENCE MODEL - DISABLED FOR UNIFORM AGE MODE # TO SWITCH BACK: Set SKIP_HARVEST_MODEL <- FALSE below and uncomment loading code # ============================================================================ SKIP_HARVEST_MODEL <- TRUE harvest_imminence_data <- NULL if (!SKIP_HARVEST_MODEL) { message("Loading harvest imminence model...") # Try multiple possible locations for the harvest model harvest_model_candidates <- c( here("python_app", "harvest_detection_experiments", "experiment_framework", "04_production_export"), here("04_production_export"), here("..", "python_app", "harvest_detection_experiments", "experiment_framework", "04_production_export") ) harvest_model_dir <- NULL for (candidate in harvest_model_candidates) { if (dir.exists(candidate)) { harvest_model_dir <- candidate message(paste("Found harvest model at:", harvest_model_dir)) break } } if (!is.null(harvest_model_dir)) { # Load CI data for model (from cumulative RDS) ci_rds_path <- file.path(cumulative_CI_vals_dir, "combined_CI_data.rds") if (file.exists(ci_rds_path)) { ci_data <- readRDS(ci_rds_path) harvest_imminence_data <- calculate_harvest_imminence(ci_data, field_boundaries_sf, harvest_model_dir, imminent_threshold = 0.5) } else { message("Note: CI data not found - harvest imminence detection will be skipped") } } else { message("Note: Harvest model directory not found - harvest imminence detection will be skipped") } } else { message("Harvest imminence model skipped (SKIP_HARVEST_MODEL = TRUE)") } # Calculate cloud coverage from weekly mosaic (more accurate than 8-band data) cloud_data <- NULL if (!is.null(current_ci)) { cloud_data <- calculate_cloud_coverage_from_mosaic(current_ci, field_boundaries_sf) # Export cloud coverage data to RDS for use in downstream reporting scripts if (!is.null(cloud_data) && nrow(cloud_data) > 0) { cloud_data_file <- file.path(reports_dir, paste0(project_dir, "_cloud_coverage_week", weeks$current_week, ".rds")) saveRDS(cloud_data, cloud_data_file) message(paste("Cloud coverage data exported to:", cloud_data_file)) } } else { message("Note: Current mosaic not available - cloud coverage analysis will be skipped") } # Calculate analysis field_analysis_df <- calculate_per_field_analysis( current_ci, previous_ci, field_boundaries_sf, previous_week_result, planting_dates, end_date, cloud_data, harvest_imminence_data ) if (nrow(field_analysis_df) == 0) { stop("No fields were analyzed successfully") } summary_statistics_df <- generate_field_analysis_summary(field_analysis_df) export_paths <- export_field_analysis_excel( field_analysis_df, summary_statistics_df, project_dir, weeks$current_week, reports_dir ) # Export weekly harvest predictions if available if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) { harvest_export_path <- export_harvest_predictions(harvest_imminence_data, project_dir, weeks$current_week, reports_dir) } # Print summary cat("\n=== FIELD ANALYSIS SUMMARY ===\n") cat("Fields analyzed:", nrow(field_analysis_df), "\n") cat("Excel export:", export_paths$excel, "\n") cat("RDS export:", export_paths$rds, "\n") if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) { cat("Harvest predictions export:", harvest_export_path, "\n") } cat("\n") cat("--- Per-field results (first 10) ---\n") print(head(field_analysis_df[, c("Field_id", "Acreage", "Age_week", "Phase (age based)", "Germination_progress", "Status_trigger", "Weekly_ci_change")], 10)) cat("\n--- Summary statistics ---\n") print(summary_statistics_df) } if (sys.nframe() == 0) { main() }