updateing csv file

This commit is contained in:
Timon 2026-01-20 12:06:45 +01:00
parent c11b10a73f
commit 4143bdf4d7
3 changed files with 117 additions and 102 deletions

View file

@ -66,17 +66,15 @@ CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05
# Negative slope = CV decreasing = field becoming MORE uniform = GOOD
# Positive slope = CV increasing = field becoming MORE patchy = BAD
# Near zero = Homogenous growth (all crops progressing equally)
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.05 # CV decreasing rapidly
CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # Gradual synchronization
CV_SLOPE_IMPROVEMENT_MAX <- -0.005 # Becoming uniform
CV_SLOPE_HOMOGENOUS_MIN <- -0.005 # Stable, uniform growth
CV_SLOPE_HOMOGENOUS_MAX <- 0.005 # No change in uniformity
CV_SLOPE_PATCHINESS_MIN <- 0.005 # Minor divergence
CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness
CV_SLOPE_SEVERE_MIN <- 0.02 # Field fragmentation beginning
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 # CV decreasing rapidly (>3% drop over 8 weeks)
CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # CV decreasing (2-3% drop over 8 weeks)
CV_SLOPE_IMPROVEMENT_MAX <- -0.01 # Gradual improvement (1-2% drop over 8 weeks)
CV_SLOPE_HOMOGENOUS_MIN <- -0.01 # Essentially stable (small natural variation)
CV_SLOPE_HOMOGENOUS_MAX <- 0.01 # No change in uniformity (within ±1% over 8 weeks)
CV_SLOPE_PATCHINESS_MIN <- 0.01 # Minor divergence (1-2% increase over 8 weeks)
CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness (2-3% increase over 8 weeks)
CV_SLOPE_SEVERE_MIN <- 0.02 # Severe fragmentation (>3% increase over 8 weeks)
# CLOUD COVER ROUNDING INTERVALS
CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100)
# PERCENTILE CALCULATIONS
CI_PERCENTILE_LOW <- 0.10
@ -391,7 +389,7 @@ main <- function() {
current_week = current_week,
year = year)
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase"))
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
# Load weekly harvest probabilities from script 31 (if available)
message("\n4. Loading harvest probabilities from script 31...")
@ -479,7 +477,7 @@ main <- function() {
if (is.na(planting_dt)) {
return(NA_real_)
}
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 1)
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0)
})
},
# Column 10: Phase (recalculate based on updated Age_week)
@ -493,31 +491,23 @@ main <- function() {
NA_character_
})
},
# Column 11: nmr_weeks_in_this_phase (already in current_stats from calculate_kpi_trends)
# Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends)
# Column 12: Germination_progress (calculated here from CI values)
Germination_progress = {
sapply(seq_len(nrow(current_stats)), function(idx) {
age_w <- Age_week[idx]
mean_ci_val <- Mean_CI[idx]
# Only relevant for germination phase (0-4 weeks)
if (is.na(age_w) || age_w < 0 || age_w >= 4) {
return(NA_character_)
}
# Estimate % of field with CI >= germination threshold
# Based on mean CI, estimate germination percentage
if (mean_ci_val >= 0.4) {
return(">80%")
} else if (mean_ci_val >= 0.25) {
return("50-80%")
} else if (mean_ci_val >= 0.1) {
return("20-50%")
} else {
return("<20%")
}
})
},
# Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}),
# Column 13: Imminent_prob (from script 31 or NA if not available)
Imminent_prob = {
if (!is.null(imminent_prob_data)) {
@ -526,59 +516,84 @@ main <- function() {
rep(NA_real_, nrow(current_stats))
}
},
# Column 14: Status_trigger (based on harvest probability + growth status)
Status_trigger = {
triggers <- sapply(seq_len(nrow(current_stats)), function(idx) {
# Column 14: Status_Alert (based on harvest probability + crop health status)
# Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA
Status_Alert = {
sapply(seq_len(nrow(current_stats)), function(idx) {
imminent_prob <- Imminent_prob[idx]
age_w <- Age_week[idx]
ci_change <- Weekly_ci_change[idx]
phase <- Phase[idx]
weekly_ci_chg <- Weekly_ci_change[idx]
mean_ci_val <- Mean_CI[idx]
# Priority 1: Harvest imminent (high probability)
if (!is.na(imminent_prob) && imminent_prob > 0.5) {
return("harvest_imminent")
# Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months)
if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) {
return("Ready for harvest-check")
}
# Priority 2: Age-based triggers
if (!is.na(age_w)) {
if (age_w >= 45) return("harvest_ready")
if (age_w >= 39) return("maturation_progressing")
if (age_w >= 4 & age_w < 39) return("growth_on_track")
if (age_w < 4) return("germination_started")
# Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5)
if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) {
return("Strong decline in crop health")
}
# Fallback
# Priority 3: Harvested/bare (Mean CI < 1.5)
if (!is.na(mean_ci_val) && mean_ci_val < 1.5) {
return("Harvested/bare")
}
# Fallback: no alert
NA_character_
})
triggers
},
# Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
# Column 17: Already in current_stats (CV)
# Column 18: Already in current_stats (CV_Trend_Short_Term)
# Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
# Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope)
# 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01)
CV_Trend_Long_Term_Category = {
sapply(current_stats$CV_Trend_Long_Term, categorize_cv_slope)
sapply(current_stats$CV_Trend_Long_Term, function(slope) {
if (is.na(slope)) {
return(NA_character_)
} else if (slope < -0.01) {
return("More uniform")
} else if (slope > 0.01) {
return("Less uniform")
} else {
return("Stable uniformity")
}
})
},
# Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
.keep = "all" # Keep all existing columns
# Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}),
) %>%
select(
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Mean_CI", "Weekly_ci_change",
"Four_week_trend", "Last_harvest_or_planting_date", "Age_week", "Phase",
"nmr_weeks_in_this_phase", "Germination_progress", "Imminent_prob", "Status_trigger",
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
"Last_harvest_or_planting_date", "Age_week", "Phase",
"Germination_progress",
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
"Cloud_pct_clear", "Cloud_category")),
any_of(c("CI_range", "CI_Percentiles"))
"Imminent_prob", "Cloud_pct_clear", "Cloud_category"))
)
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)
export_paths <- export_field_analysis_excel(
field_analysis_df,
summary_statistics_df,
NULL,
project_dir,
current_week,
year,
@ -586,15 +601,12 @@ main <- function() {
)
cat("\n--- Per-field Results (first 10) ---\n")
available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_trigger", "Cloud_category")
available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
if (length(available_cols) > 0) {
print(head(field_analysis_df[, available_cols], 10))
}
cat("\n--- Summary Statistics ---\n")
print(summary_statistics_df)
# ========== FARM-LEVEL KPI AGGREGATION ==========
# Aggregate the per-field analysis into farm-level summary statistics
@ -623,15 +635,15 @@ main <- function() {
farm_summary$phase_distribution <- phase_dist
# 2. STATUS TRIGGER DISTRIBUTION
# 2. STATUS ALERT DISTRIBUTION
status_dist <- field_data %>%
group_by(Status_trigger) %>%
group_by(Status_Alert) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Status_trigger)
rename(Category = Status_Alert)
farm_summary$status_distribution <- status_dist

View file

@ -112,8 +112,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
field_df_rounded <- field_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
summary_df_rounded <- summary_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
# Handle NULL summary_df
summary_df_rounded <- if (!is.null(summary_df)) {
summary_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
} else {
NULL
}
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
if (!dir.exists(output_subdir)) {
@ -124,10 +129,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
excel_path <- file.path(output_subdir, excel_filename)
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
# Build sheets list dynamically
sheets <- list(
"Field Data" = field_df_rounded,
"Summary" = summary_df_rounded
"Field Data" = field_df_rounded
)
if (!is.null(summary_df_rounded)) {
sheets[["Summary"]] <- summary_df_rounded
}
write_xlsx(sheets, excel_path)
message(paste("✓ Field analysis Excel exported to:", excel_path))

View file

@ -184,12 +184,17 @@ round_cloud_to_intervals <- function(cloud_pct_clear) {
return(NA_character_)
}
if (cloud_pct_clear < 50) return("<50%")
if (cloud_pct_clear < 10) return("0-10%")
if (cloud_pct_clear < 20) return("10-20%")
if (cloud_pct_clear < 30) return("20-30%")
if (cloud_pct_clear < 40) return("30-40%")
if (cloud_pct_clear < 50) return("40-50%")
if (cloud_pct_clear < 60) return("50-60%")
if (cloud_pct_clear < 70) return("60-70%")
if (cloud_pct_clear < 80) return("70-80%")
if (cloud_pct_clear < 90) return("80-90%")
return(">90%")
if (cloud_pct_clear < 95) return("90-95%")
return("95-100%")
}
get_ci_percentiles <- function(ci_values) {
@ -420,12 +425,18 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
ci_percentiles_str <- get_ci_percentiles(ci_vals)
# Count pixels with CI >= 2 (germination threshold)
GERMINATION_CI_THRESHOLD <- 2.0
num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE)
num_pixels_total <- length(ci_vals)
pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0
field_rows <- extracted[extracted$ID == field_poly_idx, ]
num_total <- nrow(field_rows)
num_data <- sum(!is.na(field_rows$CI))
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
cloud_cat <- if (num_data == 0) "No image available"
else if (pct_clear >= 99.5) "Clear view"
else if (pct_clear >= 95) "Clear view"
else "Partial coverage"
# Age_week and Phase are now calculated in main script using actual planting dates
@ -440,9 +451,10 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
results_list[[length(results_list) + 1]] <- data.frame(
Field_id = field_id,
Mean_CI = round(mean_ci, 2),
CV = round(cv, 4),
CV = round(cv * 100, 2),
CI_range = range_str,
CI_Percentiles = ci_percentiles_str,
Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
Cloud_pct_clear = pct_clear,
Cloud_category = cloud_cat,
stringsAsFactors = FALSE
@ -482,7 +494,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
current_stats$CV_Trend_Short_Term <- NA_real_
current_stats$Four_week_trend <- NA_real_
current_stats$CV_Trend_Long_Term <- NA_real_
current_stats$nmr_weeks_in_this_phase <- 1L
current_stats$nmr_of_weeks_analysed <- 1L
if (is.null(prev_stats) || nrow(prev_stats) == 0) {
message(" No previous week data available - using defaults")
@ -502,7 +514,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
if (length(analysis_files) > 0) {
recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)]
prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE,
col_select = c(Field_id, nmr_weeks_in_this_phase, Phase))
col_select = c(Field_id, nmr_of_weeks_analysed, Phase))
}
}
}, error = function(e) {
@ -510,7 +522,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
})
if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
message(paste(" Using previous field_analysis to track nmr_weeks_in_this_phase"))
message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed"))
}
historical_4weeks <- list()
@ -643,30 +655,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
dplyr::filter(Field_id == field_id)
if (nrow(prev_analysis_row) > 0) {
prev_phase_analysis <- prev_analysis_row$Phase[1]
prev_nmr_weeks_analysis <- prev_analysis_row$nmr_weeks_in_this_phase[1]
prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1]
if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase_analysis)) {
if (current_stats$Phase[i] == prev_phase_analysis) {
current_stats$nmr_weeks_in_this_phase[i] <-
if (!is.na(prev_nmr_weeks_analysis)) prev_nmr_weeks_analysis + 1L else 2L
} else {
current_stats$nmr_weeks_in_this_phase[i] <- 1L
}
}
} else if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) {
if (current_stats$Phase[i] == prev_row$Phase[1]) {
current_stats$nmr_weeks_in_this_phase[i] <- 2L
# Only increment nmr_of_weeks_analysed if we have previous data
if (!is.na(prev_nmr_weeks_analysis)) {
current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L
} else {
current_stats$nmr_weeks_in_this_phase[i] <- 1L
}
}
} else {
if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) {
if (current_stats$Phase[i] == prev_row$Phase[1]) {
current_stats$nmr_weeks_in_this_phase[i] <- 2L
} else {
current_stats$nmr_weeks_in_this_phase[i] <- 1L
current_stats$nmr_of_weeks_analysed[i] <- 1L
}
}
}