added some more stats and functions calculate and export csv and rds file for future ref
This commit is contained in:
parent
4e94a9a78b
commit
6e88acef25
|
|
@ -65,11 +65,6 @@
|
|||
# [✓] Add Cloud_pct_clear (% from pixel coverage)
|
||||
# [✓] Column order: Reorder to match spec (1-21)
|
||||
#
|
||||
# PHASE 1 LIMITATION (Known Issue - To Fix in PHASE 2):
|
||||
# - Fields spanning multiple tiles currently show statistics from first intersecting tile only
|
||||
# - This results in p10 ≈ p90 (few pixels per tile) instead of field-wide percentiles
|
||||
# - FIX: After extracting all tiles, group by field_id and aggregate pixel values across all tiles
|
||||
# before calculating percentiles. This will give true field-wide statistics.
|
||||
#
|
||||
# PHASE 2 - MEDIUM (Requires computation):
|
||||
# [ ] Add nmr_weeks_in_this_phase (track phase transitions with previous week CSV)
|
||||
|
|
@ -539,6 +534,245 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL)
|
|||
})
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MODULAR STATISTICS CALCULATION (Reusable for any week)
|
||||
# ============================================================================
|
||||
|
||||
calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
|
||||
mosaic_dir, report_date = Sys.Date()) {
|
||||
|
||||
message(paste("Calculating statistics for all fields - Week", week_num, year))
|
||||
|
||||
# Build tile file list
|
||||
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
|
||||
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
||||
|
||||
if (length(tile_files) == 0) {
|
||||
stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
|
||||
}
|
||||
|
||||
message(paste(" Found", length(tile_files), "tiles for week", week_num))
|
||||
|
||||
results_list <- list()
|
||||
fields_processed <- 0
|
||||
|
||||
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
|
||||
for (tile_idx in seq_along(tile_files)) {
|
||||
tile_file <- tile_files[tile_idx]
|
||||
|
||||
tryCatch({
|
||||
# Load tile
|
||||
current_rast <- terra::rast(tile_file)
|
||||
ci_band <- current_rast[["CI"]]
|
||||
|
||||
if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
|
||||
message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found"))
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Extract all fields from this tile in ONE call
|
||||
# terra::extract returns a dataframe with columns: ID, CI
|
||||
# where each row is one pixel, and ID indicates which polygon it came from
|
||||
extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE)
|
||||
|
||||
# Group by field ID and calculate statistics for each field
|
||||
# extracted$ID contains the field polygon index (1 to nrow(field_boundaries_sf))
|
||||
unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)])
|
||||
|
||||
for (field_poly_idx in unique_field_ids) {
|
||||
# Get all CI values for this field from this tile
|
||||
field_id <- field_boundaries_sf$field[field_poly_idx]
|
||||
ci_vals <- extracted$CI[extracted$ID == field_poly_idx]
|
||||
ci_vals <- ci_vals[!is.na(ci_vals)]
|
||||
|
||||
# Skip if no data for this field in this tile
|
||||
if (length(ci_vals) == 0) {
|
||||
next
|
||||
}
|
||||
|
||||
# Calculate statistics
|
||||
mean_ci <- mean(ci_vals, na.rm = TRUE)
|
||||
ci_std <- sd(ci_vals, na.rm = TRUE)
|
||||
cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
|
||||
range_min <- min(ci_vals, na.rm = TRUE)
|
||||
range_max <- max(ci_vals, na.rm = TRUE)
|
||||
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
|
||||
ci_percentiles_str <- get_ci_percentiles(ci_vals)
|
||||
|
||||
# Cloud coverage: count total pixels vs non-NA pixels for this field
|
||||
field_rows <- extracted[extracted$ID == field_poly_idx, ]
|
||||
num_total <- nrow(field_rows)
|
||||
num_data <- sum(!is.na(field_rows$CI))
|
||||
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
|
||||
cloud_cat <- if (num_data == 0) "No image available"
|
||||
else if (pct_clear >= 99.5) "Clear view"
|
||||
else "Partial coverage"
|
||||
|
||||
# Age and Phase
|
||||
age_weeks <- if (USE_UNIFORM_AGE) {
|
||||
as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
|
||||
} else {
|
||||
NA_real_
|
||||
}
|
||||
phase <- get_phase_by_age(age_weeks)
|
||||
|
||||
# Germination progress (only for young plants, age < 17 weeks)
|
||||
germination_progress <- NA_character_
|
||||
if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks < 17) {
|
||||
pct_ci_ge_threshold <- sum(ci_vals >= GERMINATION_CI_THRESHOLD) / length(ci_vals) * 100
|
||||
germination_progress <- sprintf("%.1f%%", pct_ci_ge_threshold)
|
||||
}
|
||||
|
||||
# Store result (check if field already exists from another tile)
|
||||
existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id)
|
||||
|
||||
if (length(existing_idx) > 0) {
|
||||
# Field already in results from previous tile - keep first occurrence or average
|
||||
# For now, keep the first one (earlier tiles)
|
||||
next
|
||||
}
|
||||
|
||||
# Store new field result
|
||||
results_list[[length(results_list) + 1]] <- data.frame(
|
||||
Field_id = field_id,
|
||||
Mean_CI = round(mean_ci, 2),
|
||||
CV = round(cv, 4),
|
||||
CI_range = range_str,
|
||||
CI_Percentiles = ci_percentiles_str,
|
||||
Cloud_pct_clear = pct_clear,
|
||||
Cloud_category = cloud_cat,
|
||||
Age_week = round(age_weeks, 1),
|
||||
Phase = phase,
|
||||
Germination_progress = germination_progress,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
fields_processed <- fields_processed + 1
|
||||
}
|
||||
|
||||
message(paste(" Tile", tile_idx, "of", length(tile_files), "processed"))
|
||||
|
||||
}, error = function(e) {
|
||||
message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message))
|
||||
})
|
||||
}
|
||||
|
||||
if (length(results_list) == 0) {
|
||||
stop(paste("No fields processed successfully for week", week_num))
|
||||
}
|
||||
|
||||
stats_df <- dplyr::bind_rows(results_list)
|
||||
message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields"))
|
||||
|
||||
return(stats_df)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# CALCULATE KPI TRENDS (Requires previous week RDS)
|
||||
# ============================================================================
|
||||
|
||||
calculate_kpi_trends <- function(current_stats, prev_stats = NULL) {
|
||||
|
||||
message("Calculating KPI trends from current and previous week data")
|
||||
|
||||
# Initialize new columns with defaults
|
||||
current_stats$Weekly_ci_change <- NA_real_
|
||||
current_stats$CV_Trend_Short_Term <- NA_real_
|
||||
current_stats$nmr_weeks_in_this_phase <- 1L
|
||||
|
||||
# If no previous week data, return with defaults
|
||||
if (is.null(prev_stats) || nrow(prev_stats) == 0) {
|
||||
message(" No previous week data available - using defaults")
|
||||
return(current_stats)
|
||||
}
|
||||
|
||||
# Build lookup indices for previous week (by Field_id)
|
||||
prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id)
|
||||
|
||||
# For each field in current week, lookup previous values
|
||||
for (i in seq_len(nrow(current_stats))) {
|
||||
field_id <- current_stats$Field_id[i]
|
||||
prev_idx <- prev_lookup[field_id]
|
||||
|
||||
if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) {
|
||||
# Field exists in previous week
|
||||
prev_row <- prev_stats[prev_idx, ]
|
||||
|
||||
# Weekly CI change (current Mean_CI - previous Mean_CI)
|
||||
if (!is.na(prev_row$Mean_CI) && !is.na(current_stats$Mean_CI[i])) {
|
||||
current_stats$Weekly_ci_change[i] <-
|
||||
round(current_stats$Mean_CI[i] - prev_row$Mean_CI, 2)
|
||||
}
|
||||
|
||||
# CV short-term trend (current CV - previous CV)
|
||||
if (!is.na(prev_row$CV) && !is.na(current_stats$CV[i])) {
|
||||
current_stats$CV_Trend_Short_Term[i] <-
|
||||
round(current_stats$CV[i] - prev_row$CV, 4)
|
||||
}
|
||||
|
||||
# Weeks in current phase (track phase transitions)
|
||||
if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase)) {
|
||||
if (current_stats$Phase[i] == prev_row$Phase) {
|
||||
# Same phase - increment counter
|
||||
prev_weeks <- if (!is.na(prev_row$nmr_weeks_in_this_phase)) {
|
||||
prev_row$nmr_weeks_in_this_phase
|
||||
} else {
|
||||
1
|
||||
}
|
||||
current_stats$nmr_weeks_in_this_phase[i] <- prev_weeks + 1L
|
||||
} else {
|
||||
# Phase changed - reset to 1
|
||||
current_stats$nmr_weeks_in_this_phase[i] <- 1L
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
message(" Calculated trends for all fields")
|
||||
return(current_stats)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# LOAD OR CALCULATE WEEKLY STATISTICS (RDS Caching)
|
||||
# ============================================================================
|
||||
|
||||
load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf,
|
||||
mosaic_dir, reports_dir, report_date = Sys.Date()) {
|
||||
|
||||
# Build RDS file path
|
||||
rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, week_num)
|
||||
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
|
||||
|
||||
# Try to load existing RDS (fast cache)
|
||||
if (file.exists(rds_path)) {
|
||||
message(paste("Loading cached statistics from:", basename(rds_path)))
|
||||
return(readRDS(rds_path))
|
||||
}
|
||||
|
||||
# RDS not found - calculate from tiles
|
||||
message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
|
||||
stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year,
|
||||
mosaic_dir, report_date)
|
||||
|
||||
# Create output directory if needed
|
||||
output_dir <- file.path(reports_dir, "kpis", "field_stats")
|
||||
if (!dir.exists(output_dir)) {
|
||||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
|
||||
# Export RDS (for fast lookup next week)
|
||||
saveRDS(stats_df, rds_path)
|
||||
message(paste("Saved weekly statistics RDS:", basename(rds_path)))
|
||||
|
||||
# Export CSV (for user review)
|
||||
csv_filename <- sprintf("%s_field_stats_week%02d.csv", project_dir, week_num)
|
||||
csv_path <- file.path(output_dir, csv_filename)
|
||||
readr::write_csv(stats_df, csv_path)
|
||||
message(paste("Saved weekly statistics CSV:", basename(csv_path)))
|
||||
|
||||
return(stats_df)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PARALLEL FIELD ANALYSIS FUNCTION
|
||||
# ============================================================================
|
||||
|
|
@ -1316,248 +1550,116 @@ main <- function() {
|
|||
}
|
||||
|
||||
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
|
||||
message("\nProcessing tiles and extracting field statistics...")
|
||||
all_tile_results <- list()
|
||||
# ============================================================================
|
||||
# NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
|
||||
# ============================================================================
|
||||
|
||||
for (i in seq_along(tile_files)) {
|
||||
tile_file <- tile_files[i]
|
||||
message(paste(" Processing tile", i, "of", length(tile_files), ":", basename(tile_file)))
|
||||
# Build tile grid (needed by calculate_field_statistics)
|
||||
message("\nBuilding tile grid for current week...")
|
||||
tile_grid <- build_tile_grid(mosaic_dir, current_week, year)
|
||||
|
||||
tryCatch({
|
||||
# Load current tile and previous week tile
|
||||
current_rast <- terra::rast(tile_file)
|
||||
message("\nUsing modular RDS-based approach for weekly statistics...")
|
||||
|
||||
# DEBUG: Check tile structure on first tile
|
||||
if (i == 1) {
|
||||
message(paste(" [DEBUG] Tile CRS:", terra::crs(current_rast)))
|
||||
message(paste(" [DEBUG] Tile extent:", paste(terra::ext(current_rast))))
|
||||
message(paste(" [DEBUG] Field boundaries CRS:", sf::st_crs(field_boundaries_sf)))
|
||||
field_bbox <- sf::st_bbox(field_boundaries_sf)
|
||||
message(paste(" [DEBUG] Field bbox:", paste(round(field_bbox, 2))))
|
||||
message(paste(" [DEBUG] Band names:", paste(names(current_rast), collapse=", ")))
|
||||
}
|
||||
# Load/calculate CURRENT week stats (from tiles or RDS cache)
|
||||
message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
|
||||
current_stats <- load_or_calculate_weekly_stats(
|
||||
week_num = current_week,
|
||||
year = year,
|
||||
project_dir = project_dir,
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
mosaic_dir = tile_grid$mosaic_dir,
|
||||
reports_dir = reports_dir,
|
||||
report_date = end_date
|
||||
)
|
||||
|
||||
# Extract CI band by name
|
||||
ci_band <- current_rast[["CI"]]
|
||||
message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
|
||||
|
||||
# Check if CI band exists - use proper logical checks
|
||||
if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
|
||||
message(paste(" ERROR: CI band not found. Available bands:", paste(names(current_rast), collapse=", ")))
|
||||
next
|
||||
}
|
||||
# Load/calculate PREVIOUS week stats (from RDS cache or tiles)
|
||||
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
|
||||
prev_stats <- load_or_calculate_weekly_stats(
|
||||
week_num = previous_week,
|
||||
year = year,
|
||||
project_dir = project_dir,
|
||||
field_boundaries_sf = field_boundaries_sf,
|
||||
mosaic_dir = tile_grid$mosaic_dir,
|
||||
reports_dir = reports_dir,
|
||||
report_date = end_date - 7 # Approximately 1 week before
|
||||
)
|
||||
|
||||
# Check if CI band has any valid data
|
||||
if (tryCatch(all(is.na(values(ci_band))), error = function(e) TRUE)) {
|
||||
message(paste(" ERROR: CI band has no valid data"))
|
||||
next
|
||||
}
|
||||
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
|
||||
|
||||
# Load previous week tile if available
|
||||
previous_tile_file <- sub(sprintf("week_%02d", current_week),
|
||||
sprintf("week_%02d", previous_week),
|
||||
tile_file)
|
||||
previous_ci <- NULL
|
||||
if (file.exists(previous_tile_file)) {
|
||||
previous_rast <- terra::rast(previous_tile_file)
|
||||
previous_ci <- previous_rast[["CI"]]
|
||||
}
|
||||
# Apply trend calculations (requires both weeks)
|
||||
message("\n3. Calculating trend columns...")
|
||||
current_stats <- calculate_kpi_trends(current_stats, prev_stats)
|
||||
|
||||
# OPTION 1 + 2: Extract all CI statistics from one pixel extraction (single call)
|
||||
current_stats <- extract_field_statistics_from_ci(ci_band, field_boundaries_sf)
|
||||
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, nmr_weeks_in_this_phase"))
|
||||
|
||||
# DEBUG: Check extraction result on first tile
|
||||
if (i == 1) {
|
||||
num_with_data <- sum(!is.na(current_stats$mean_ci))
|
||||
message(paste(" [DEBUG] Extracted", nrow(current_stats), "fields, ", num_with_data, "with non-NA data"))
|
||||
if (num_with_data > 0) {
|
||||
message(paste(" [DEBUG] Sample mean CIs:", paste(head(current_stats$mean_ci[!is.na(current_stats$mean_ci)], 3), collapse=", ")))
|
||||
# Check percentiles
|
||||
sample_field <- which(!is.na(current_stats$mean_ci))[2]
|
||||
message(paste(" [DEBUG] Field", sample_field, "- p10:", current_stats$p10[sample_field],
|
||||
"p90:", current_stats$p90[sample_field],
|
||||
"min:", current_stats$min_ci[sample_field],
|
||||
"max:", current_stats$max_ci[sample_field],
|
||||
"valid_pixels:", current_stats$pixel_count_valid[sample_field]))
|
||||
}
|
||||
}
|
||||
# ============================================================================
|
||||
# Build final output dataframe with all 21 columns
|
||||
# ============================================================================
|
||||
|
||||
# Extract previous week CI statistics if available
|
||||
previous_stats <- NULL
|
||||
if (!is.null(previous_ci)) {
|
||||
previous_stats <- extract_field_statistics_from_ci(previous_ci, field_boundaries_sf)
|
||||
}
|
||||
|
||||
# Process each field that was extracted
|
||||
field_results_this_tile <- list()
|
||||
fields_added <- 0
|
||||
|
||||
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
||||
tryCatch({
|
||||
field_id <- field_boundaries_sf$field[field_idx]
|
||||
field_sf <- field_boundaries_sf[field_idx, ]
|
||||
|
||||
# Get statistics from helper function results
|
||||
# current_stats should have same number of rows as field_boundaries_sf
|
||||
if (field_idx > nrow(current_stats)) {
|
||||
message(paste(" [ERROR] field_idx", field_idx, "> nrow(current_stats)", nrow(current_stats)))
|
||||
next
|
||||
}
|
||||
|
||||
mean_ci_current <- current_stats$mean_ci[field_idx]
|
||||
|
||||
# SKIP fields with no data in this tile (they don't intersect this tile)
|
||||
if (is.na(current_stats$pixel_count_valid[field_idx]) || current_stats$pixel_count_valid[field_idx] == 0) {
|
||||
next
|
||||
}
|
||||
ci_cv_current <- current_stats$cv[field_idx]
|
||||
ci_percentile_low <- current_stats$p10[field_idx]
|
||||
ci_percentile_high <- current_stats$p90[field_idx]
|
||||
|
||||
# If field doesn't intersect this tile, mean_ci_current will be NA
|
||||
if (is.na(mean_ci_current)) {
|
||||
next # Skip this field - doesn't intersect this tile
|
||||
}
|
||||
message("\nBuilding final field analysis output...")
|
||||
|
||||
field_analysis_df <- current_stats %>%
|
||||
mutate(
|
||||
# Column 2: Farm_Section (user fills)
|
||||
Farm_Section = NA_character_,
|
||||
# Column 3: Field_name (from GeoJSON - already have Field_id, can look up)
|
||||
Field_name = Field_id,
|
||||
# Column 4: Acreage (calculate from geometry)
|
||||
Acreage = {
|
||||
acreages <- sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
|
||||
field_sf <- field_boundaries_sf[idx, ]
|
||||
field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
|
||||
field_area_acres <- field_area_ha / 0.404686
|
||||
|
||||
# Extract previous week CI if available
|
||||
mean_ci_previous <- NA
|
||||
ci_change <- NA
|
||||
if (!is.null(previous_stats)) {
|
||||
mean_ci_previous <- previous_stats$mean_ci[field_idx]
|
||||
if (!is.na(mean_ci_previous)) {
|
||||
ci_change <- mean_ci_current - mean_ci_previous
|
||||
}
|
||||
}
|
||||
|
||||
# Reconstruct pixel values for status trigger (we need the actual pixel array)
|
||||
# Use the percentiles and mean to create a synthetic distribution for status_trigger
|
||||
# For now, use mean CI repeated by pixel count for testing
|
||||
# TODO: Consider extracting pixels directly if needed for more complex triggers
|
||||
pixel_count_valid <- current_stats$pixel_count_valid[field_idx]
|
||||
ci_vals_current <- if (!is.na(pixel_count_valid) && pixel_count_valid > 0) {
|
||||
rep(mean_ci_current, pixel_count_valid) # Simplified: use mean value repeated
|
||||
} else {
|
||||
numeric(0)
|
||||
}
|
||||
|
||||
# Calculate age
|
||||
age_weeks <- if (!is.null(planting_dates) && nrow(planting_dates) > 0 && field_idx <= nrow(planting_dates)) {
|
||||
planting_date <- planting_dates$date[field_idx]
|
||||
if (!is.na(planting_date)) {
|
||||
as.numeric(difftime(end_date, planting_date, units = "weeks"))
|
||||
} else {
|
||||
0
|
||||
}
|
||||
} else {
|
||||
0
|
||||
}
|
||||
|
||||
# Get phase and status
|
||||
phase <- get_phase_by_age(age_weeks)
|
||||
status_trigger <- get_status_trigger(ci_vals_current, ci_change, age_weeks)
|
||||
|
||||
# Calculate Cloud_pct_clear: percentage of field with valid data
|
||||
# Binned to 10% intervals (0, 10, 20, ..., 90, 100)
|
||||
cloud_pct_clear <- {
|
||||
valid_count <- current_stats$pixel_count_valid[field_idx]
|
||||
total_count <- current_stats$pixel_count_total[field_idx]
|
||||
if (!is.na(valid_count) && !is.na(total_count) && total_count > 0) {
|
||||
pct <- (valid_count / total_count) * 100
|
||||
round(pct / 10) * 10
|
||||
} else {
|
||||
NA_real_
|
||||
}
|
||||
}
|
||||
|
||||
# Cloud categorization based on pixel coverage (Cloud_pct_clear)
|
||||
cloud_category <- if (is.na(cloud_pct_clear)) {
|
||||
"No image available"
|
||||
} else if (cloud_pct_clear >= 90) {
|
||||
"Clear view"
|
||||
} else if (cloud_pct_clear > 0) {
|
||||
"Partial coverage"
|
||||
} else {
|
||||
"No image available"
|
||||
}
|
||||
|
||||
# Get min/max CI values
|
||||
ci_min <- current_stats$min_ci[field_idx]
|
||||
ci_max <- current_stats$max_ci[field_idx]
|
||||
ci_range <- if (!is.na(ci_min) && !is.na(ci_max)) {
|
||||
sprintf("%.1f-%.1f", ci_min, ci_max)
|
||||
} else {
|
||||
NA_character_
|
||||
}
|
||||
|
||||
# Get field_name from field_boundaries_sf
|
||||
field_name <- field_sf$field
|
||||
|
||||
# Build result row (21 columns per specification, in order)
|
||||
result_row <- data.frame(
|
||||
Field_id = field_id,
|
||||
Farm_Section = NA_character_,
|
||||
Field_name = field_name,
|
||||
Acreage = field_area_acres,
|
||||
Mean_CI = mean_ci_current,
|
||||
Weekly_ci_change = ci_change,
|
||||
Four_week_trend = NA_character_,
|
||||
Last_harvest_or_planting_date = UNIFORM_PLANTING_DATE,
|
||||
Age_weeks = age_weeks,
|
||||
Phase = phase,
|
||||
nmr_weeks_in_this_phase = NA_real_,
|
||||
Germination_progress = NA_real_,
|
||||
Imminent_prob = NA_real_,
|
||||
Status_trigger = status_trigger,
|
||||
CI_range = ci_range,
|
||||
CI_Percentiles = if (!is.na(ci_percentile_low) && !is.na(ci_percentile_high)) {
|
||||
sprintf("%.1f-%.1f", ci_percentile_low, ci_percentile_high)
|
||||
} else {
|
||||
NA_character_
|
||||
},
|
||||
CV = ci_cv_current,
|
||||
CV_Trend_Short_Term = NA_real_,
|
||||
CV_Trend_Long_Term = NA_real_,
|
||||
Cloud_pct_clear = cloud_pct_clear,
|
||||
Cloud_category = cloud_category,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
field_results_this_tile[[as.character(field_id)]] <- result_row
|
||||
fields_added <- fields_added + 1
|
||||
|
||||
}, error = function(e) {
|
||||
# Show error for debugging
|
||||
message(paste(" [FIELD ERROR] Field", field_idx, ":", e$message))
|
||||
field_area_ha / 0.404686
|
||||
})
|
||||
}
|
||||
acreages[match(Field_id, field_boundaries_sf$field)]
|
||||
},
|
||||
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
|
||||
# Column 7: Four_week_trend (Phase 3 future)
|
||||
Four_week_trend = NA_character_,
|
||||
# Column 8: Last_harvest_or_planting_date (dummy for now)
|
||||
Last_harvest_or_planting_date = UNIFORM_PLANTING_DATE,
|
||||
# Columns 9-10: Already in current_stats (Age_week, Phase)
|
||||
# Column 11: nmr_weeks_in_this_phase (already calculated)
|
||||
# Column 12: Germination_progress (already calculated)
|
||||
# Column 13: Imminent_prob (placeholder)
|
||||
Imminent_prob = "placeholder data",
|
||||
# Column 14: Status_trigger (need to add)
|
||||
Status_trigger = {
|
||||
triggers <- sapply(seq_len(nrow(current_stats)), function(idx) {
|
||||
field_id <- current_stats$Field_id[idx]
|
||||
field_idx <- which(field_boundaries_sf$field == field_id)[1]
|
||||
if (is.na(field_idx)) return(NA_character_)
|
||||
|
||||
if (length(field_results_this_tile) > 0) {
|
||||
all_tile_results[[basename(tile_file)]] <- dplyr::bind_rows(field_results_this_tile)
|
||||
message(paste(" Extracted", length(field_results_this_tile), "fields from tile (processed", fields_added, "fields total)"))
|
||||
} else {
|
||||
message(paste(" WARNING: No fields extracted from this tile (processed", fields_added, "fields, all either NA or errored)"))
|
||||
}
|
||||
# Reconstruct CI values from Mean_CI for status trigger logic
|
||||
# For now, use simplified approach
|
||||
age_w <- current_stats$Age_week[idx]
|
||||
ci_change <- current_stats$Weekly_ci_change[idx]
|
||||
|
||||
}, error = function(e) {
|
||||
message(paste(" Error processing tile", basename(tile_file), ":", e$message))
|
||||
})
|
||||
}
|
||||
# Using mean CI as proxy (could be improved with pixel distribution)
|
||||
ci_vals <- rep(current_stats$Mean_CI[idx], 100)
|
||||
get_status_trigger(ci_vals, ci_change, age_w)
|
||||
})
|
||||
triggers
|
||||
},
|
||||
# Columns 15-16: Already in current_stats (CI_range, CI_Percentiles)
|
||||
# Column 17: Already in current_stats (CV)
|
||||
# Column 18: Already in current_stats (CV_Trend_Short_Term)
|
||||
# Column 19: CV_Trend_Long_Term (Phase 3 future)
|
||||
CV_Trend_Long_Term = NA_real_,
|
||||
# Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
|
||||
.keep = "all" # Keep all existing columns
|
||||
) %>%
|
||||
select(
|
||||
Field_id, Farm_Section, Field_name, Acreage, Mean_CI, Weekly_ci_change,
|
||||
Four_week_trend, Last_harvest_or_planting_date, Age_week, Phase,
|
||||
nmr_weeks_in_this_phase, Germination_progress, Imminent_prob, Status_trigger,
|
||||
CI_range, CI_Percentiles, CV, CV_Trend_Short_Term, CV_Trend_Long_Term,
|
||||
Cloud_pct_clear, Cloud_category
|
||||
)
|
||||
|
||||
# Combine all tile results, keeping unique fields (may appear in multiple tiles)
|
||||
if (length(all_tile_results) == 0) {
|
||||
stop("No fields extracted from any tiles!")
|
||||
}
|
||||
|
||||
field_analysis_df <- dplyr::bind_rows(all_tile_results) %>%
|
||||
distinct(Field_id, .keep_all = TRUE)
|
||||
|
||||
if (nrow(field_analysis_df) == 0) {
|
||||
stop("No fields analyzed successfully!")
|
||||
}
|
||||
|
||||
message(paste("✓ Analyzed", nrow(field_analysis_df), "fields"))
|
||||
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
|
||||
|
||||
summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue