SmartCane/r_app/old_scripts/09_field_analysis_weekly.R

1113 lines
40 KiB
R

# 09_FIELD_ANALYSIS_WEEKLY.R
# ==========================
# Per-field weekly analysis with phase detection and status triggers
# Generates detailed field-level CSV export with:
# - Field identifiers and areas
# - Weekly CI change (mean ± std)
# - Age-based phase assignment (Germination, Tillering, Grand Growth, Maturation)
# - Harvest imminence detection (Phase 1 from LSTM model)
# - Status triggers (non-exclusive, can coexist with harvest imminent phase)
# - Phase transition tracking (weeks in current phase)
# - Cloud coverage analysis from 8-band satellite data (band 9 = cloud mask)
#
# Harvest Imminence:
# - Runs LSTM Phase 1 detection to get imminent_prob (probability harvest in next ~4 weeks)
# - If imminent_prob > 0.5, phase = "Harvest Imminent" (overrides age-based phase)
# - Weekly predictions exported to separate Excel for tracking
#
# Cloud Coverage Categories (from band 9: 1=clear, 0=cloudy):
# - Clear view: >=99.5% clear pixels (100% practical coverage)
# - Partial coverage: 0-99.5% clear pixels (some cloud interference)
# - No image available: 0% clear pixels (completely clouded)
#
# Output:
# - Excel (.xlsx) with Field Data sheet and Summary sheet
# - Excel (.xlsx) weekly harvest predictions for tracking
# - RDS file with field_analysis and field_analysis_summary for Rmd reports
# - Summary includes: Monitored area, Cloud coverage, Phase distribution, Status triggers
#
# Usage: Rscript 09_field_analysis_weekly.R [end_date] [offset] [project_dir]
# - end_date: End date for analysis (YYYY-MM-DD format), default: today
# - offset: Number of days to look back (for consistency, not currently used)
# - project_dir: Project directory name (e.g., "aura", "esa", "angata")
# 1. Load required libraries
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(dplyr)
library(tidyr)
library(lubridate)
library(readr)
library(readxl)
library(writexl)
# Optional: torch for harvest model inference (will skip if not available)
tryCatch({
library(torch)
}, error = function(e) {
message("Note: torch package not available - harvest model inference will be skipped")
})
})
# ============================================================================
# PHASE AND STATUS TRIGGER DEFINITIONS
# ============================================================================
PHASE_DEFINITIONS <- data.frame(
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
age_start = c(0, 4, 17, 39),
age_end = c(6, 16, 39, 200),
stringsAsFactors = FALSE
)
STATUS_TRIGGERS <- data.frame(
trigger = c(
"germination_started",
"germination_complete",
"stress_detected_whole_field",
"strong_recovery",
"growth_on_track",
"maturation_progressing",
"harvest_ready"
),
age_min = c(0, 0, NA, NA, 4, 39, 45),
age_max = c(6, 6, NA, NA, 39, 200, 200),
description = c(
"10% of field CI > 2",
"70% of field CI >= 2",
"CI decline > -1.5 + low CV",
"CI increase > +1.5",
"CI increasing consistently",
"High CI, stable/declining",
"Age 45+ weeks (ready to harvest)"
),
stringsAsFactors = FALSE
)
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
get_phase_by_age <- function(age_weeks) {
if (is.na(age_weeks)) return(NA_character_)
for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
return(PHASE_DEFINITIONS$phase[i])
}
}
return("Unknown")
}
get_status_trigger <- function(ci_values, ci_change, age_weeks) {
if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)
ci_values <- ci_values[!is.na(ci_values)]
if (length(ci_values) == 0) return(NA_character_)
pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
mean_ci <- mean(ci_values, na.rm = TRUE)
# Germination phase triggers (age 0-6)
if (age_weeks >= 0 && age_weeks <= 6) {
if (pct_at_or_above_2 >= 70) {
return("germination_complete")
} else if (pct_above_2 > 10) {
return("germination_started")
}
}
# Harvest ready (45+ weeks) - check first to prioritize
if (age_weeks >= 45) {
return("harvest_ready")
}
# Stress detection (any phase except Germination)
if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
return("stress_detected_whole_field")
}
# Strong recovery (any phase except Germination)
if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
return("strong_recovery")
}
# Growth on track (Tillering/Grand Growth, 4-39 weeks)
if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
return("growth_on_track")
}
# Maturation progressing (39-45 weeks, high CI stable/declining)
if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
return("maturation_progressing")
}
return(NA_character_)
}
load_previous_week_csv <- function(project_dir, current_week, reports_dir) {
lookback_weeks <- c(1, 2, 3)
for (lookback in lookback_weeks) {
previous_week <- current_week - lookback
if (previous_week < 1) previous_week <- previous_week + 52
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", previous_week), ".csv")
csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)
if (file.exists(csv_path)) {
tryCatch({
prev_data <- read.csv(csv_path, stringsAsFactors = FALSE)
message(if (lookback == 1)
paste("Loaded previous week CSV (week", previous_week, ")")
else
paste("Loaded fallback CSV from", lookback, "weeks ago (week", previous_week, ")"))
return(list(data = prev_data, weeks_lookback = lookback, found = TRUE))
}, error = function(e) {
message(paste("Warning: Could not read CSV from week", previous_week))
})
}
}
message("No previous field analysis CSV found. Phase tracking will be age-based only.")
return(list(data = NULL, weeks_lookback = NA, found = FALSE))
}
# ============================================================================
# UNIFORM AGE MODE - Using fixed planting date for all fields
# TO SWITCH BACK: Set USE_UNIFORM_AGE <- FALSE and uncomment original code
# ============================================================================
USE_UNIFORM_AGE <- TRUE
UNIFORM_PLANTING_DATE <- as.Date("2025-01-01") # All fields planted this date
extract_planting_dates <- function(harvesting_data) {
if (USE_UNIFORM_AGE) {
message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
return(data.frame(
field_id = NA, # NA = apply to all fields
planting_date = UNIFORM_PLANTING_DATE,
stringsAsFactors = FALSE
))
}
# ORIGINAL CODE (commented out - uncomment when switching back to real harvest data):
# if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
# message("Warning: No harvesting data available.")
# return(NULL)
# }
#
# tryCatch({
# # Get the MOST RECENT season_start for each field (most recent row per field)
# planting_dates <- harvesting_data %>%
# arrange(field, desc(season_start)) %>%
# distinct(field, .keep_all = TRUE) %>%
# select(field, season_start) %>%
# rename(field_id = field, planting_date = season_start) %>%
# filter(!is.na(planting_date)) %>%
# as.data.frame()
#
# message(paste("Extracted planting dates for", nrow(planting_dates), "fields (most recent season)"))
# return(planting_dates)
# }, error = function(e) {
# message(paste("Error extracting planting dates:", e$message))
# return(NULL)
# })
}
# ============================================================================
# CLOUD ANALYSIS FUNCTION
# ============================================================================
calculate_field_cloud_coverage <- function(field_boundaries_sf, merged_tif_8b_dir, current_week, current_year) {
message("Calculating per-field cloud coverage from weekly cloud mosaic...")
# Check if cloud data directory exists
if (!dir.exists(merged_tif_8b_dir)) {
message(paste("Warning: Cloud data directory not found:", merged_tif_8b_dir))
return(NULL)
}
# Get all TIF files (format: YYYY-MM-DD.tif)
tif_files <- list.files(merged_tif_8b_dir, pattern = "\\.tif$", full.names = TRUE)
if (length(tif_files) == 0) {
message("Warning: No 8-band TIF files found in cloud data directory")
return(NULL)
}
# Extract dates from filenames and filter for current week
file_dates <- as.Date(gsub("\\.tif$", "", basename(tif_files)))
current_week_date <- as.Date(paste(current_year, "01-01", sep = "-")) + (current_week - 1) * 7
week_start <- current_week_date - as.numeric(format(current_week_date, "%w"))
week_end <- week_start + 6
week_files <- tif_files[file_dates >= week_start & file_dates <= week_end]
if (length(week_files) == 0) {
message(paste("Warning: No 8-band files found for week", current_week))
return(NULL)
}
message(paste("Found", length(week_files), "cloud mask files for week", current_week))
# Step 1: Create weekly cloud mosaic using max aggregation (same as CI mosaic)
message("Creating weekly cloud mosaic using max aggregation...")
cloud_rasters <- list()
for (i in seq_along(week_files)) {
tryCatch({
r <- terra::rast(week_files[i])
# Band 9 is the cloud mask (1 = clear, 0 = cloudy)
if (nlyr(r) < 9) {
message(paste(" Warning: File has only", nlyr(r), "bands, need 9 (band 9 = cloud mask)"))
next
}
cloud_band <- r[[9]]
cloud_rasters[[i]] <- cloud_band
}, error = function(e) {
message(paste(" Error processing", basename(week_files[i]), ":", e$message))
})
}
if (length(cloud_rasters) == 0) {
message("Warning: No valid cloud mask data found")
return(NULL)
}
# Create mosaic: use max value to prefer clear pixels (1 = clear, 0 = cloudy)
if (length(cloud_rasters) == 1) {
cloud_mosaic <- cloud_rasters[[1]]
} else {
# Filter out NULL entries
cloud_rasters <- cloud_rasters[!sapply(cloud_rasters, is.null)]
rsrc <- terra::sprc(cloud_rasters)
cloud_mosaic <- terra::mosaic(rsrc, fun = "max")
}
message("Weekly cloud mosaic created")
# Step 2: Extract cloud coverage per field from the mosaic
message("Extracting cloud coverage per field...")
cloud_summary <- data.frame(
field_id = character(),
pct_clear = numeric(),
cloud_category = character(),
stringsAsFactors = FALSE
)
for (i in seq_len(nrow(field_boundaries_sf))) {
tryCatch({
field_id <- field_boundaries_sf$field[i]
field_geom <- terra::vect(sf::as_Spatial(field_boundaries_sf[i, ]))
# Extract cloud values from mosaic (1 = clear, 0 = cloudy)
cloud_vals <- terra::extract(cloud_mosaic, field_geom)[, 2]
cloud_vals <- cloud_vals[!is.na(cloud_vals)]
if (length(cloud_vals) > 0) {
# Calculate % clear (value 1 = clear, 0 = cloudy)
pct_clear <- (sum(cloud_vals == 1) / length(cloud_vals)) * 100
# Categorize: 100% = Clear, 0-99.9% = Partial, 0% = No Image
if (pct_clear >= 99.5) {
category <- "Clear view"
} else if (pct_clear > 0 && pct_clear < 99.5) {
category <- "Partial coverage"
} else {
category <- "No image available"
}
cloud_summary <- rbind(cloud_summary, data.frame(
field_id = field_id,
pct_clear = round(pct_clear, 1),
cloud_category = category,
stringsAsFactors = FALSE
))
}
}, error = function(e) {
message(paste(" Error extracting cloud data for field", field_id, ":", e$message))
})
}
message(paste("Cloud analysis complete for", nrow(cloud_summary), "fields"))
return(cloud_summary)
}
#' Calculate per-field cloud coverage from weekly mosaic
#'
#' @param mosaic_raster The weekly CI mosaic raster (already cloud-masked)
#' @param field_boundaries_sf Field boundaries as sf object
#' @return Data frame with per-field cloud coverage (clear acres, % clear, category)
#'
calculate_cloud_coverage_from_mosaic <- function(mosaic_raster, field_boundaries_sf) {
message("Calculating per-field cloud coverage from weekly mosaic...")
if (is.null(mosaic_raster) || class(mosaic_raster)[1] != "SpatRaster") {
message("Warning: Invalid mosaic raster provided")
return(NULL)
}
# Extract CI band (last band)
ci_band <- mosaic_raster[[nlyr(mosaic_raster)]]
# Ensure CRS matches
if (!terra::same.crs(ci_band, field_boundaries_sf)) {
field_boundaries_sf <- sf::st_transform(field_boundaries_sf, sf::st_crs(ci_band))
}
cloud_data <- data.frame(
field_id = character(),
sub_field = character(),
clear_pixels = numeric(),
total_pixels = numeric(),
missing_pixels = numeric(),
pct_clear = numeric(),
cloud_category = character(),
stringsAsFactors = FALSE
)
# Process each field
for (i in seq_len(nrow(field_boundaries_sf))) {
field_id <- field_boundaries_sf$field[i]
sub_field <- field_boundaries_sf$sub_field[i]
# Extract pixel values from field using terra::extract (memory-efficient, native terra)
extracted_vals <- tryCatch({
result <- terra::extract(ci_band, field_boundaries_sf[i, ], cells = TRUE, na.rm = FALSE)
# terra::extract returns a data.frame with ID and value columns; extract values only
if (is.data.frame(result) && nrow(result) > 0) {
data.frame(value = result$value, cells = result$cell)
} else {
NULL
}
}, error = function(e) {
NULL
})
# Skip if extraction failed or returned empty
if (is.null(extracted_vals) || !is.data.frame(extracted_vals) || nrow(extracted_vals) == 0) {
next
}
# Count pixels: data vs missing
ci_vals <- extracted_vals$value
num_data <- sum(!is.na(ci_vals))
num_total <- length(ci_vals)
num_missing <- num_total - num_data
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
# Categorize
category <- if (pct_clear >= 85) "Clear view"
else if (pct_clear > 0) "Partial coverage"
else "No image available"
# Store result
cloud_data <- rbind(cloud_data, data.frame(
field_id = field_id,
sub_field = sub_field,
clear_pixels = num_data,
total_pixels = num_total,
missing_pixels = num_missing,
pct_clear = pct_clear,
cloud_category = category,
stringsAsFactors = FALSE
))
}
message(paste("Per-field cloud analysis complete for", nrow(cloud_data), "fields"))
return(cloud_data)
}
# ============================================================================
# HARVEST IMMINENCE DETECTION FUNCTION
# ============================================================================
calculate_harvest_imminence <- function(ci_data, field_boundaries_sf, harvest_model_dir,
imminent_threshold = 0.5) {
message("Calculating harvest imminence probabilities...")
# Check if model files exist
model_path <- file.path(harvest_model_dir, "model.pt")
config_path <- file.path(harvest_model_dir, "config.json")
scalers_path <- file.path(harvest_model_dir, "scalers.pkl")
if (!file.exists(model_path) || !file.exists(config_path) || !file.exists(scalers_path)) {
message("Warning: Harvest model files not found in", harvest_model_dir)
return(NULL)
}
tryCatch({
# Load model using harvest_date_pred_utils
# Note: This requires torch package and Python utilities, skip if not available
harvest_utils_path <- file.path(harvest_model_dir, "harvest_date_pred_utils.R")
if (!file.exists(harvest_utils_path)) {
message("Note: Harvest utils R file not found, using age-based trigger only")
return(NULL)
}
source(harvest_utils_path)
# Load model, config, and scalers
model <- torch::torch_load(model_path)
model$eval()
config <- jsonlite::read_json(config_path)
scalers <- reticulate::py_load_object(scalers_path)
# Run Phase 1 detection for each field
harvest_results <- data.frame(
field_id = character(),
imminent_prob = numeric(),
harvest_imminent = logical(),
stringsAsFactors = FALSE
)
for (field in unique(ci_data$field)) {
tryCatch({
field_data <- ci_data %>%
filter(field == !!field) %>%
arrange(Date)
if (nrow(field_data) < 10) {
message(paste(" Skipping field", field, "- insufficient CI data"))
next
}
# Extract CI values
ci_vals <- field_data$value
# Run Phase 1 (growing window detection with imminent probability)
phase1_result <- run_phase1_growing_window(
ci_values = ci_vals,
scalers = scalers,
config = config,
model = model
)
if (!is.null(phase1_result)) {
imminent_prob <- phase1_result$imminent_prob
harvest_imminent <- imminent_prob > imminent_threshold
harvest_results <- rbind(harvest_results, data.frame(
field_id = field,
imminent_prob = round(imminent_prob, 3),
harvest_imminent = harvest_imminent,
stringsAsFactors = FALSE
))
}
}, error = function(e) {
message(paste(" Error processing field", field, ":", e$message))
})
}
message(paste("Harvest imminence detection complete for", nrow(harvest_results), "fields"))
return(harvest_results)
}, error = function(e) {
message(paste("Error in harvest imminence calculation:", e$message))
return(NULL)
})
}
# ============================================================================
# MAIN CALCULATION FUNCTION
# ============================================================================
calculate_per_field_analysis <- function(current_ci, previous_ci, field_boundaries_sf,
previous_week_result = NULL, planting_dates = NULL, report_date = Sys.Date(),
cloud_data = NULL, harvest_imminence_data = NULL) {
message("Calculating per-field analysis...")
# Debug CRS info
message(paste(" Current CI CRS:", terra::crs(current_ci)))
message(paste(" Field boundaries CRS:", sf::st_crs(field_boundaries_sf)$input))
# Debug mosaic info
ci_ext <- terra::ext(current_ci)
ci_summary <- terra::global(current_ci, "range", na.rm = TRUE)
message(paste(" Mosaic extent:", paste(round(c(ci_ext$xmin, ci_ext$xmax, ci_ext$ymin, ci_ext$ymax), 4), collapse=", ")))
message(paste(" Mosaic value range:", paste(round(as.numeric(ci_summary), 4), collapse=" to ")))
previous_week_csv <- NULL
weeks_lookback <- NA
if (!is.null(previous_week_result) && previous_week_result$found) {
previous_week_csv <- previous_week_result$data
weeks_lookback <- previous_week_result$weeks_lookback
}
field_analysis <- list()
field_count <- 0
for (i in seq_len(nrow(field_boundaries_sf))) {
field_id <- field_boundaries_sf$field[i]
farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) {
field_boundaries_sf$sub_area[i]
} else {
NA_character_
}
field_name <- field_boundaries_sf$field[i]
# Check if geometry is valid and non-empty
field_sf <- field_boundaries_sf[i, ]
if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) {
message(paste(" Skipping", field_id, "- invalid or empty geometry"))
next
}
# Calculate field area from geometry (in hectares)
tryCatch({
field_geom_calc <- terra::vect(sf::as_Spatial(field_sf))
terra::crs(field_geom_calc) <- terra::crs(current_ci) # Set CRS to match
field_area_ha <- terra::expanse(field_geom_calc) / 10000 # Convert m² to hectares
field_area_acres <- field_area_ha / 0.404686 # Convert to acres
}, error = function(e) {
message(paste(" Skipping", field_id, "- error calculating geometry:", e$message))
next
})
# Extract current CI values with CRS validation
tryCatch({
field_geom <- terra::vect(sf::as_Spatial(field_sf))
# Set CRS explicitly to match raster (as_Spatial loses CRS info)
terra::crs(field_geom) <- terra::crs(current_ci)
extract_result <- terra::extract(current_ci, field_geom)
current_ci_vals <- extract_result[, 2]
current_ci_vals <- current_ci_vals[!is.na(current_ci_vals)]
}, error = function(e) {
message(paste(" Error extracting CI for", field_id, ":", e$message))
current_ci_vals <<- c()
})
if (length(current_ci_vals) == 0) {
if (i <= 5) { # Debug first 5 fields only
message(paste(" DEBUG: Field", field_id, "- extract returned empty, geometry may not overlap or all NA"))
}
next
}
# Calculate CI statistics
mean_ci_current <- mean(current_ci_vals, na.rm = TRUE)
cv_current <- sd(current_ci_vals, na.rm = TRUE) / mean_ci_current
range_min <- min(current_ci_vals, na.rm = TRUE)
range_max <- max(current_ci_vals, na.rm = TRUE)
# Calculate weekly CI change
weekly_ci_change <- NA
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
if (!is.null(previous_ci)) {
previous_ci_vals <- terra::extract(previous_ci, field_geom)[, 2]
previous_ci_vals <- previous_ci_vals[!is.na(previous_ci_vals)]
if (length(previous_ci_vals) > 0) {
mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE)
weekly_ci_change <- mean_ci_current - mean_ci_previous
}
}
# Format CI change
ci_std <- sd(current_ci_vals, na.rm = TRUE)
if (is.na(weekly_ci_change)) {
weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std)
} else {
direction <- if (weekly_ci_change > 0) "+" else ""
weekly_ci_change_str <- sprintf("%s%.1f (%.1f ± %.2f)", direction, weekly_ci_change, mean_ci_current, ci_std)
}
# Get field age from most recent season_start in harvest.xlsx (relative to report_date)
age_weeks <- NA
if (!is.null(planting_dates)) {
# Check for exact field match OR if field_id is NA (uniform age mode)
planting_row <- which(tolower(planting_dates$field_id) == tolower(field_id) | is.na(planting_dates$field_id))
if (length(planting_row) > 0) {
planting_date <- as.Date(planting_dates$planting_date[planting_row[1]])
age_weeks <- as.integer(difftime(report_date, planting_date, units = "weeks"))
}
}
# Calculate germination progression (for germination phase only)
pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100
pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100
germination_progress_str <- NA_character_
if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) {
germination_progress_str <- sprintf("%.0f%% > 2, %.0f%% ≥ 2", pct_ci_above_2, pct_ci_ge_2)
}
# Assign phase and trigger
# Priority: Check harvest imminence first (overrides age-based phase)
phase <- "Unknown"
imminent_prob_val <- NA
if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
harvest_row <- which(harvest_imminence_data$field_id == field_id)
if (length(harvest_row) > 0) {
imminent_prob_val <- harvest_imminence_data$imminent_prob[harvest_row[1]]
if (harvest_imminence_data$harvest_imminent[harvest_row[1]]) {
phase <- "Harvest Imminent"
}
}
}
# If not harvest imminent, use age-based phase
if (phase == "Unknown") {
phase <- get_phase_by_age(age_weeks)
}
status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks)
# Track phase transitions
nmr_weeks_in_phase <- 1
if (!is.null(previous_week_csv)) {
prev_row <- which(previous_week_csv$Field_id == field_id)
if (length(prev_row) > 0) {
prev_phase <- previous_week_csv$`Phase (age based)`[prev_row[1]]
prev_weeks <- as.numeric(previous_week_csv$nmr_weeks_in_this_phase[prev_row[1]])
if (!is.na(prev_phase) && prev_phase == phase) {
nmr_weeks_in_phase <- prev_weeks + if (!is.na(weeks_lookback) && weeks_lookback > 1) weeks_lookback else 1
} else {
nmr_weeks_in_phase <- if (!is.na(weeks_lookback) && weeks_lookback > 1) weeks_lookback else 1
}
}
}
field_count <- field_count + 1
# Get cloud data for this field if available
cloud_pct <- NA
cloud_cat <- NA
if (!is.null(cloud_data) && nrow(cloud_data) > 0) {
cloud_row <- which(cloud_data$field_id == field_id)
if (length(cloud_row) > 0) {
cloud_pct <- cloud_data$pct_clear[cloud_row[1]]
cloud_cat <- cloud_data$cloud_category[cloud_row[1]]
}
}
field_analysis[[field_count]] <- data.frame(
Field_id = field_id,
Farm_Section = farm_section,
Field_name = field_name,
Acreage = round(field_area_acres, 2),
Mean_CI = round(mean_ci_current, 2),
Weekly_ci_change = weekly_ci_change_str,
Germination_progress = germination_progress_str,
Age_week = age_weeks,
`Phase (age based)` = phase,
nmr_weeks_in_this_phase = nmr_weeks_in_phase,
Imminent_prob = imminent_prob_val,
Status_trigger = status_trigger,
CI_range = range_str,
CV = round(cv_current, 3),
Cloud_pct_clear = cloud_pct,
Cloud_category = cloud_cat,
stringsAsFactors = FALSE,
check.names = FALSE
)
}
if (length(field_analysis) == 0) {
message("ERROR: No fields processed!")
return(data.frame())
}
field_df <- do.call(rbind, field_analysis)
rownames(field_df) <- NULL
message(paste("Per-field analysis completed for", nrow(field_df), "fields"))
return(field_df)
}
generate_field_analysis_summary <- function(field_df) {
message("Generating summary statistics...")
# Total acreage (FIRST - needed for all percentages)
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
# Phase breakdown
germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE)
tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE)
grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE)
maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE)
unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE)
# Status trigger breakdown
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
# Cloud coverage breakdown
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
monitored_acreage <- clear_acreage + partial_acreage + no_image_acreage # Total monitored
# Count fields by cloud category
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
total_fields <- nrow(field_df)
# Create summary as a proper table with clear structure
summary_df <- data.frame(
Category = c(
"MONITORED AREA",
"Total Monitored Acreage",
"",
"CLOUD COVERAGE",
"Clear view (# fields)",
"Clear view (acres)",
"Partial coverage (# fields)",
"Partial coverage (acres)",
"No image available (# fields)",
"No image available (acres)",
"",
"PHASE DISTRIBUTION",
"Germination",
"Tillering",
"Grand Growth",
"Maturation",
"Unknown Phase",
"",
"STATUS TRIGGERS",
"Harvest Ready",
"Strong Recovery",
"Growth On Track",
"Stress Detected",
"Germination Complete",
"Germination Started",
"No Active Trigger",
"",
"TOTAL FARM",
"Total Acreage"
),
Acreage = c(
NA,
round(monitored_acreage, 2),
NA,
NA,
paste0(clear_fields, " fields"),
round(clear_acreage, 2),
paste0(partial_fields, " fields"),
round(partial_acreage, 2),
paste0(no_image_fields, " fields"),
round(no_image_acreage, 2),
NA,
NA,
round(germination_acreage, 2),
round(tillering_acreage, 2),
round(grand_growth_acreage, 2),
round(maturation_acreage, 2),
round(unknown_phase_acreage, 2),
NA,
NA,
round(harvest_ready_acreage, 2),
round(recovery_acreage, 2),
round(growth_on_track_acreage, 2),
round(stress_acreage, 2),
round(germination_complete_acreage, 2),
round(germination_started_acreage, 2),
round(no_trigger_acreage, 2),
NA,
NA,
round(total_acreage, 2)
),
stringsAsFactors = FALSE
)
# Add metadata as attributes for use in report
attr(summary_df, "cloud_fields_clear") <- clear_fields
attr(summary_df, "cloud_fields_partial") <- partial_fields
attr(summary_df, "cloud_fields_no_image") <- no_image_fields
attr(summary_df, "cloud_fields_total") <- total_fields
return(summary_df)
}
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
message("Exporting per-field analysis to Excel and RDS...")
# Save to kpis/field_analysis subfolder
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
if (!dir.exists(output_subdir)) {
dir.create(output_subdir, recursive = TRUE)
}
# Create Excel with two sheets: Field Data and Summary
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename)
# Normalize path for Windows
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
# Prepare sheets as a list
sheets <- list(
"Field Data" = field_df,
"Summary" = summary_df
)
# Export to Excel
write_xlsx(sheets, excel_path)
message(paste("✓ Field analysis Excel exported to:", excel_path))
# Also save as RDS for 91_CI_report_with_kpis_Angata.Rmd to consume
# RDS format: list with field_analysis and field_analysis_summary for compatibility
kpi_data <- list(
field_analysis = field_df,
field_analysis_summary = summary_df,
metadata = list(
project = project_dir,
current_week = current_week,
export_time = Sys.time()
)
)
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds")
rds_path <- file.path(reports_dir, "kpis", rds_filename)
saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path))
return(list(excel = excel_path, rds = rds_path))
}
export_harvest_predictions <- function(harvest_data, project_dir, current_week, reports_dir) {
message("Exporting weekly harvest predictions...")
if (is.null(harvest_data) || nrow(harvest_data) == 0) {
message("No harvest predictions to export")
return(NULL)
}
# Save to kpis/harvest_predictions subfolder
output_subdir <- file.path(reports_dir, "kpis", "harvest_predictions")
if (!dir.exists(output_subdir)) {
dir.create(output_subdir, recursive = TRUE)
}
# Create Excel with harvest predictions
excel_filename <- paste0(project_dir, "_harvest_predictions_week", sprintf("%02d", current_week), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename)
# Normalize path for Windows
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
# Prepare data for export
export_df <- harvest_data %>%
select(field_id, imminent_prob, harvest_imminent) %>%
mutate(harvest_imminent = ifelse(harvest_imminent, "Yes", "No"))
# Export to Excel
write_xlsx(list("Predictions" = export_df), excel_path)
message(paste("✓ Weekly harvest predictions exported to:", excel_path))
return(excel_path)
}
# ============================================================================
# MAIN
# ============================================================================
main <- function() {
args <- commandArgs(trailingOnly = TRUE)
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
as.Date(args[1])
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
Sys.Date()
}
offset <- if (length(args) >= 2 && !is.na(args[2])) {
as.numeric(args[2])
} else if (exists("offset", envir = .GlobalEnv)) {
get("offset", envir = .GlobalEnv)
} else {
7
}
project_dir <- if (length(args) >= 3 && !is.na(args[3])) {
as.character(args[3])
} else if (exists("project_dir", envir = .GlobalEnv)) {
get("project_dir", envir = .GlobalEnv)
} else {
"angata"
}
assign("project_dir", project_dir, envir = .GlobalEnv)
# Load utilities and configuration
source(here("r_app", "crop_messaging_utils.R"))
source(here("r_app", "parameters_project.R"))
message("=== FIELD ANALYSIS WEEKLY ===")
message(paste("Date:", end_date))
message(paste("Project:", project_dir))
# Calculate week numbers
weeks <- list(
current_week = as.numeric(format(end_date, "%V")),
previous_week = as.numeric(format(end_date, "%V")) - 1,
year = as.numeric(format(end_date, "%Y"))
)
if (weeks$previous_week < 1) weeks$previous_week <- 52
message(paste("Week:", weeks$current_week, "/ Year:", weeks$year))
# Load CI mosaics
load_weekly_ci <- function(week_num, year, mosaic_dir) {
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
week_path <- file.path(mosaic_dir, week_file)
if (!file.exists(week_path)) {
message(paste(" Mosaic not found:", week_file))
return(NULL)
}
tryCatch({
mosaic_raster <- terra::rast(week_path)
message(paste(" Mosaic bands:", terra::nlyr(mosaic_raster), "layers"))
# Check for named CI band, otherwise extract band 5 (CI is typically the 5th band)
if ("CI" %in% names(mosaic_raster)) {
ci_raster <- mosaic_raster[["CI"]]
message(paste(" Extracted named CI band"))
} else if (terra::nlyr(mosaic_raster) >= 5) {
# Mosaic has 5+ bands: R, G, B, NIR, CI - extract band 5
ci_raster <- mosaic_raster[[5]]
message(paste(" Extracted band 5 (CI) from multi-band mosaic"))
} else {
# Fallback to band 1 if fewer than 5 bands
ci_raster <- mosaic_raster[[1]]
message(paste(" Using band 1 (only", terra::nlyr(mosaic_raster), "bands available)"))
}
names(ci_raster) <- "CI"
message(paste(" ✓ Loaded:", week_file))
return(ci_raster)
}, error = function(e) {
message(paste(" Error loading:", e$message))
return(NULL)
})
}
message("Loading CI mosaics...")
current_ci <- load_weekly_ci(weeks$current_week, weeks$year, weekly_CI_mosaic)
previous_ci <- load_weekly_ci(weeks$previous_week, weeks$year, weekly_CI_mosaic)
if (is.null(current_ci)) {
stop("Current week CI mosaic is required but not found")
}
# Load historical data
previous_week_result <- load_previous_week_csv(project_dir, weeks$current_week, reports_dir)
planting_dates <- extract_planting_dates(harvesting_data)
# ============================================================================
# HARVEST IMMINENCE MODEL - DISABLED FOR UNIFORM AGE MODE
# TO SWITCH BACK: Set SKIP_HARVEST_MODEL <- FALSE below and uncomment loading code
# ============================================================================
SKIP_HARVEST_MODEL <- TRUE
harvest_imminence_data <- NULL
if (!SKIP_HARVEST_MODEL) {
message("Loading harvest imminence model...")
# Try multiple possible locations for the harvest model
harvest_model_candidates <- c(
here("python_app", "harvest_detection_experiments", "experiment_framework", "04_production_export"),
here("04_production_export"),
here("..", "python_app", "harvest_detection_experiments", "experiment_framework", "04_production_export")
)
harvest_model_dir <- NULL
for (candidate in harvest_model_candidates) {
if (dir.exists(candidate)) {
harvest_model_dir <- candidate
message(paste("Found harvest model at:", harvest_model_dir))
break
}
}
if (!is.null(harvest_model_dir)) {
# Load CI data for model (from cumulative RDS)
ci_rds_path <- file.path(cumulative_CI_vals_dir, "combined_CI_data.rds")
if (file.exists(ci_rds_path)) {
ci_data <- readRDS(ci_rds_path)
harvest_imminence_data <- calculate_harvest_imminence(ci_data, field_boundaries_sf,
harvest_model_dir, imminent_threshold = 0.5)
} else {
message("Note: CI data not found - harvest imminence detection will be skipped")
}
} else {
message("Note: Harvest model directory not found - harvest imminence detection will be skipped")
}
} else {
message("Harvest imminence model skipped (SKIP_HARVEST_MODEL = TRUE)")
}
# Calculate cloud coverage from weekly mosaic (more accurate than 8-band data)
cloud_data <- NULL
if (!is.null(current_ci)) {
cloud_data <- calculate_cloud_coverage_from_mosaic(current_ci, field_boundaries_sf)
# Export cloud coverage data to RDS for use in downstream reporting scripts
if (!is.null(cloud_data) && nrow(cloud_data) > 0) {
cloud_data_file <- file.path(reports_dir,
paste0(project_dir, "_cloud_coverage_week", weeks$current_week, ".rds"))
saveRDS(cloud_data, cloud_data_file)
message(paste("Cloud coverage data exported to:", cloud_data_file))
}
} else {
message("Note: Current mosaic not available - cloud coverage analysis will be skipped")
}
# Calculate analysis
field_analysis_df <- calculate_per_field_analysis(
current_ci, previous_ci, field_boundaries_sf,
previous_week_result, planting_dates, end_date, cloud_data, harvest_imminence_data
)
if (nrow(field_analysis_df) == 0) {
stop("No fields were analyzed successfully")
}
summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)
export_paths <- export_field_analysis_excel(
field_analysis_df, summary_statistics_df, project_dir,
weeks$current_week, reports_dir
)
# Export weekly harvest predictions if available
if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
harvest_export_path <- export_harvest_predictions(harvest_imminence_data, project_dir,
weeks$current_week, reports_dir)
}
# Print summary
cat("\n=== FIELD ANALYSIS SUMMARY ===\n")
cat("Fields analyzed:", nrow(field_analysis_df), "\n")
cat("Excel export:", export_paths$excel, "\n")
cat("RDS export:", export_paths$rds, "\n")
if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
cat("Harvest predictions export:", harvest_export_path, "\n")
}
cat("\n")
cat("--- Per-field results (first 10) ---\n")
print(head(field_analysis_df[, c("Field_id", "Acreage", "Age_week", "Phase (age based)",
"Germination_progress", "Status_trigger", "Weekly_ci_change")], 10))
cat("\n--- Summary statistics ---\n")
print(summary_statistics_df)
}
if (sys.nframe() == 0) {
main()
}