Cleanup: Fix CI formula, reorganize shell scripts and test files
- Fixed CI calculation: changed from NDVI (NIR-Red)/(NIR+Red) to correct NIR/Green-1 formula in: * process_single_tile() function * create_ci_band() utility function * Updated create_mask_and_crop() documentation - Renamed numbered shell scripts for clarity (matching R script numbering): * 01_run_planet_download -> 10_planet_download.sh * 02_run_ci_extraction -> 20_ci_extraction.sh * 03_run_growth_model -> 30_growth_model.sh * 04_run_mosaic_creation -> 40_mosaic_creation.sh * 09_run_calculate_kpis -> 80_calculate_kpis.sh * 10_run_kpi_report -> 90_kpi_report.sh - Archived obsolete shell scripts to old_sh/: * build_mosaic.sh, build_report.sh, interpolate_growth_model.sh * 05_run_dashboard_report.sh, 06_run_crop_messaging.sh * 11_run_yield_prediction.sh/ps1 * runcane.sh, runpython.sh, smartcane.sh, update_RDS.sh - Deleted test/debug files and temporary outputs: * analyze_*.R, benchmark_gpu_vs_cpu.py, convert_angata_harvest.py * debug_mosaic.R, examine_kpi_results.R, generate_sar_report.R * inspect_8band_structure.R, inspect_tif_bands.R * old_working_utils.R, predict_harvest_operational.R * run_kpi_calculation.R, run_report.R, simple_sar_test.R * data_validation_tool/, harvest_ci_pattern_analysis.png, kpi_debug.out - Enhanced harvest prediction: Added threshold tuning (0.40-0.45) and field type handling - Enhanced mosaic creation: Improved tile detection and routing logic
This commit is contained in:
parent
d365b5838b
commit
458b8247be
512
.Rhistory
512
.Rhistory
|
|
@ -1,512 +0,0 @@
|
|||
message("No project_dir provided. Using default:", project_dir)
|
||||
}
|
||||
# Make project_dir available globally so parameters_project.R can use it
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
# Initialize project configuration and load utility functions
|
||||
tryCatch({
|
||||
source("parameters_project.R")
|
||||
source("growth_model_utils.R")
|
||||
}, error = function(e) {
|
||||
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
||||
tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
source(here::here("r_app", "growth_model_utils.R"))
|
||||
warning(paste("Successfully sourced files from 'r_app' directory."))
|
||||
}, error = function(e) {
|
||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
||||
})
|
||||
})
|
||||
log_message("Starting CI growth model interpolation")
|
||||
# Load and process the data
|
||||
tryCatch({
|
||||
# Load the combined CI data
|
||||
CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
|
||||
# Validate harvesting data
|
||||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||||
stop("No harvesting data available")
|
||||
}
|
||||
# Get the years from harvesting data
|
||||
years <- harvesting_data %>%
|
||||
filter(!is.na(season_start)) %>%
|
||||
distinct(year) %>%
|
||||
pull(year)
|
||||
log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
|
||||
# Generate interpolated CI data for each year and field
|
||||
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
|
||||
# Calculate growth metrics and save the results
|
||||
if (nrow(CI_all) > 0) {
|
||||
# Add daily and cumulative metrics
|
||||
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
|
||||
# Save the processed data
|
||||
save_growth_model(
|
||||
CI_all_with_metrics,
|
||||
cumulative_CI_vals_dir,
|
||||
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||
)
|
||||
} else {
|
||||
log_message("No CI data was generated after interpolation", level = "WARNING")
|
||||
}
|
||||
log_message("Growth model interpolation completed successfully")
|
||||
}, error = function(e) {
|
||||
log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
|
||||
stop(e$message)
|
||||
})
|
||||
View(CI_all_with_metrics)
|
||||
View(CI_data)
|
||||
# Get the years from harvesting data
|
||||
years <- harvesting_data %>%
|
||||
filter(!is.na(season_start)) %>%
|
||||
distinct(year) %>%
|
||||
pull(year)
|
||||
years
|
||||
View(CI_all)
|
||||
View(CI_all_with_metrics)
|
||||
years
|
||||
harvesting_data
|
||||
ci_data
|
||||
ci_data = CI_data
|
||||
# Process each year
|
||||
result <- purrr::map_df(years, function(yr) {
|
||||
safe_log(paste("Processing year:", yr))
|
||||
# Get the fields harvested in this year with valid season start dates
|
||||
sub_fields <- harvesting_data %>%
|
||||
dplyr::filter(year == yr, !is.na(season_start)) %>%
|
||||
dplyr::pull(sub_field)
|
||||
if (length(sub_fields) == 0) {
|
||||
safe_log(paste("No fields with valid season data for year:", yr), "WARNING")
|
||||
return(data.frame())
|
||||
}
|
||||
# Filter sub_fields to only include those with value data in ci_data
|
||||
valid_sub_fields <- sub_fields %>%
|
||||
purrr::keep(~ any(ci_data$sub_field == .x))
|
||||
if (length(valid_sub_fields) == 0) {
|
||||
safe_log(paste("No fields with CI data for year:", yr), "WARNING")
|
||||
return(data.frame())
|
||||
}
|
||||
# Extract and interpolate data for each valid field
|
||||
safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr))
|
||||
result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x,
|
||||
harvesting_data = harvesting_data,
|
||||
field_CI_data = ci_data,
|
||||
season = yr)) %>%
|
||||
purrr::list_rbind()
|
||||
safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr))
|
||||
return(result)
|
||||
})
|
||||
CI_all_with_metrics
|
||||
CI_all <- CI_all %>%
|
||||
group_by(Date, field, season) %>%
|
||||
filter(!(field == "00F25" & season == 2023 & duplicated(DOY)))
|
||||
View(CI_all)
|
||||
# Add daily and cumulative metrics
|
||||
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
|
||||
# Save the processed data
|
||||
save_growth_model(
|
||||
CI_all_with_metrics,
|
||||
cumulative_CI_vals_dir,
|
||||
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||
)
|
||||
# Set up basic report parameters from input values
|
||||
report_date <- params$report_date
|
||||
mail_day <- params$mail_day
|
||||
borders <- params$borders
|
||||
ci_plot_type <- params$ci_plot_type
|
||||
colorblind_friendly <- params$colorblind_friendly
|
||||
facet_by_season <- params$facet_by_season
|
||||
x_axis_unit <- params$x_axis_unit
|
||||
# Configure knitr options
|
||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
||||
# Load all packages at once with suppressPackageStartupMessages
|
||||
suppressPackageStartupMessages({
|
||||
library(here)
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
library(tidyverse)
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
library(knitr)
|
||||
library(tidyr)
|
||||
})
|
||||
# Load custom utility functions
|
||||
tryCatch({
|
||||
source("report_utils.R")
|
||||
}, error = function(e) {
|
||||
message(paste("Error loading report_utils.R:", e$message))
|
||||
# Try alternative path if the first one fails
|
||||
tryCatch({
|
||||
source(here::here("r_app", "report_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Could not load report_utils.R from either location: ", e$message)
|
||||
})
|
||||
})
|
||||
# Set the project directory from parameters
|
||||
project_dir <- params$data_dir
|
||||
# Source project parameters with error handling
|
||||
tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading parameters_project.R: ", e$message)
|
||||
})
|
||||
# Log initial configuration
|
||||
safe_log("Starting the R Markdown script with KPIs")
|
||||
safe_log(paste("mail_day params:", params$mail_day))
|
||||
safe_log(paste("report_date params:", params$report_date))
|
||||
safe_log(paste("mail_day variable:", mail_day))
|
||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||
# Primary expected directory inside the laravel storage
|
||||
kpi_data_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
||||
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
||||
# Candidate filenames we expect (exact and common variants)
|
||||
expected_summary_names <- c(
|
||||
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
|
||||
paste0(project_dir, "_kpi_summary_tables.rds"),
|
||||
"kpi_summary_tables.rds",
|
||||
paste0("kpi_summary_tables_", date_suffix, ".rds")
|
||||
)
|
||||
expected_field_details_names <- c(
|
||||
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
|
||||
paste0(project_dir, "_field_details.rds"),
|
||||
"field_details.rds"
|
||||
)
|
||||
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
|
||||
try_load_from_dir <- function(dir, candidates) {
|
||||
if (!dir.exists(dir)) return(NULL)
|
||||
for (name in candidates) {
|
||||
f <- file.path(dir, name)
|
||||
if (file.exists(f)) return(f)
|
||||
}
|
||||
return(NULL)
|
||||
}
|
||||
# Try primary directory first
|
||||
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
|
||||
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
|
||||
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
|
||||
if (is.null(summary_file) || is.null(field_details_file)) {
|
||||
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
|
||||
# List rds files under laravel_app/storage/app recursively
|
||||
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
|
||||
# Try to match by expected names
|
||||
if (is.null(summary_file)) {
|
||||
matched <- files[basename(files) %in% expected_summary_names]
|
||||
if (length(matched) > 0) summary_file <- matched[1]
|
||||
}
|
||||
if (is.null(field_details_file)) {
|
||||
matched2 <- files[basename(files) %in% expected_field_details_names]
|
||||
if (length(matched2) > 0) field_details_file <- matched2[1]
|
||||
}
|
||||
}
|
||||
# Final checks and load with safe error messages
|
||||
kpi_files_exist <- FALSE
|
||||
if (!is.null(summary_file) && file.exists(summary_file)) {
|
||||
safe_log(paste("Loading KPI summary from:", summary_file))
|
||||
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
|
||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
||||
} else {
|
||||
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
|
||||
}
|
||||
if (!is.null(field_details_file) && file.exists(field_details_file)) {
|
||||
safe_log(paste("Loading field details from:", field_details_file))
|
||||
field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL })
|
||||
if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE
|
||||
} else {
|
||||
safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING")
|
||||
}
|
||||
if (kpi_files_exist) {
|
||||
safe_log("✓ KPI summary tables loaded successfully")
|
||||
} else {
|
||||
safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING")
|
||||
}
|
||||
# Set locale for consistent date formatting
|
||||
Sys.setlocale("LC_TIME", "C")
|
||||
# Initialize date variables from parameters
|
||||
today <- as.character(report_date)
|
||||
mail_day_as_character <- as.character(mail_day)
|
||||
# Calculate report dates and weeks
|
||||
report_date_obj <- as.Date(today)
|
||||
current_week <- as.numeric(format(report_date_obj, "%U"))
|
||||
year <- as.numeric(format(report_date_obj, "%Y"))
|
||||
# Calculate dates for weekly analysis
|
||||
week_start <- report_date_obj - ((as.numeric(format(report_date_obj, "%w")) + 1) %% 7)
|
||||
week_end <- week_start + 6
|
||||
# Calculate week days (copied from 05 script for compatibility)
|
||||
report_date_as_week_day <- weekdays(lubridate::ymd(today))
|
||||
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
|
||||
# Calculate initial week number
|
||||
week <- lubridate::week(today) - 1
|
||||
safe_log(paste("Initial week calculation:", week, "today:", today))
|
||||
# Calculate previous dates for comparisons
|
||||
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
|
||||
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
|
||||
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
|
||||
# Adjust week calculation based on mail day
|
||||
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
|
||||
safe_log("Adjusting weeks because of mail day")
|
||||
week <- lubridate::week(today) + 1
|
||||
today_minus_1 <- as.character(lubridate::ymd(today))
|
||||
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
|
||||
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
|
||||
}
|
||||
# Calculate week numbers for previous weeks
|
||||
week_minus_1 <- week - 1
|
||||
week_minus_2 <- week - 2
|
||||
week_minus_3 <- week - 3
|
||||
# Format current week with leading zeros
|
||||
week <- sprintf("%02d", week)
|
||||
safe_log(paste("Report week:", current_week, "Year:", year))
|
||||
safe_log(paste("Week range:", week_start, "to", week_end))
|
||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||
# Primary expected directory inside the laravel storage
|
||||
kpi_data_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
||||
date_suffix <- format(as.Date(report_date), "%Y%m%d")
|
||||
# Candidate filenames we expect (exact and common variants)
|
||||
expected_summary_names <- c(
|
||||
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
|
||||
paste0(project_dir, "_kpi_summary_tables.rds"),
|
||||
"kpi_summary_tables.rds",
|
||||
paste0("kpi_summary_tables_", date_suffix, ".rds")
|
||||
)
|
||||
expected_field_details_names <- c(
|
||||
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
|
||||
paste0(project_dir, "_field_details.rds"),
|
||||
"field_details.rds"
|
||||
)
|
||||
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
|
||||
try_load_from_dir <- function(dir, candidates) {
|
||||
if (!dir.exists(dir)) return(NULL)
|
||||
for (name in candidates) {
|
||||
f <- file.path(dir, name)
|
||||
if (file.exists(f)) return(f)
|
||||
}
|
||||
return(NULL)
|
||||
}
|
||||
# Try primary directory first
|
||||
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
|
||||
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
|
||||
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
|
||||
if (is.null(summary_file) || is.null(field_details_file)) {
|
||||
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
|
||||
# List rds files under laravel_app/storage/app recursively
|
||||
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
|
||||
# Try to match by expected names
|
||||
if (is.null(summary_file)) {
|
||||
matched <- files[basename(files) %in% expected_summary_names]
|
||||
if (length(matched) > 0) summary_file <- matched[1]
|
||||
}
|
||||
if (is.null(field_details_file)) {
|
||||
matched2 <- files[basename(files) %in% expected_field_details_names]
|
||||
if (length(matched2) > 0) field_details_file <- matched2[1]
|
||||
}
|
||||
}
|
||||
# Final checks and load with safe error messages
|
||||
kpi_files_exist <- FALSE
|
||||
if (!is.null(summary_file) && file.exists(summary_file)) {
|
||||
safe_log(paste("Loading KPI summary from:", summary_file))
|
||||
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
|
||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
||||
} else {
|
||||
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
|
||||
}
|
||||
if (!is.null(field_details_file) && file.exists(field_details_file)) {
|
||||
safe_log(paste("Loading field details from:", field_details_file))
|
||||
field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL })
|
||||
if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE
|
||||
} else {
|
||||
safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING")
|
||||
}
|
||||
if (kpi_files_exist) {
|
||||
safe_log("✓ KPI summary tables loaded successfully")
|
||||
} else {
|
||||
safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING")
|
||||
}
|
||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||
# Primary expected directory inside the laravel storage
|
||||
kpi_data_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis")
|
||||
kpi_data_dir
|
||||
kpi_data_dir
|
||||
## SIMPLE KPI LOADING - robust lookup with fallbacks
|
||||
# Primary expected directory inside the laravel storage
|
||||
kpi_data_dir <- file.path(here("laravel_app", "storage", "app", project_dir, "reports", "kpis"))
|
||||
kpi_data_dir
|
||||
# Candidate filenames we expect (exact and common variants)
|
||||
expected_summary_names <- c(
|
||||
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
|
||||
paste0(project_dir, "_kpi_summary_tables.rds"),
|
||||
"kpi_summary_tables.rds",
|
||||
paste0("kpi_summary_tables_", date_suffix, ".rds")
|
||||
)
|
||||
expected_field_details_names <- c(
|
||||
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
|
||||
paste0(project_dir, "_field_details.rds"),
|
||||
"field_details.rds"
|
||||
)
|
||||
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
|
||||
try_load_from_dir <- function(dir, candidates) {
|
||||
if (!dir.exists(dir)) return(NULL)
|
||||
for (name in candidates) {
|
||||
f <- file.path(dir, name)
|
||||
if (file.exists(f)) return(f)
|
||||
}
|
||||
return(NULL)
|
||||
}
|
||||
# Try primary directory first
|
||||
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
|
||||
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
|
||||
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
|
||||
if (is.null(summary_file) || is.null(field_details_file)) {
|
||||
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
|
||||
# List rds files under laravel_app/storage/app recursively
|
||||
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
|
||||
# Try to match by expected names
|
||||
if (is.null(summary_file)) {
|
||||
matched <- files[basename(files) %in% expected_summary_names]
|
||||
if (length(matched) > 0) summary_file <- matched[1]
|
||||
}
|
||||
if (is.null(field_details_file)) {
|
||||
matched2 <- files[basename(files) %in% expected_field_details_names]
|
||||
if (length(matched2) > 0) field_details_file <- matched2[1]
|
||||
}
|
||||
}
|
||||
# Final checks and load with safe error messages
|
||||
kpi_files_exist <- FALSE
|
||||
if (!is.null(summary_file) && file.exists(summary_file)) {
|
||||
safe_log(paste("Loading KPI summary from:", summary_file))
|
||||
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
|
||||
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
|
||||
} else {
|
||||
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
|
||||
}
|
||||
summary_file
|
||||
kpi_data_dir
|
||||
library(officer)
|
||||
library(flextable)
|
||||
# Data setup
|
||||
summary_tables <- list()
|
||||
summary_tables$field_uniformity_summary <- data.frame(
|
||||
"Uniformity Level" = c("Excellent", "Good", "Poor"),
|
||||
"Count" = c(15, 8, 3),
|
||||
"Percent" = c("62.5%", "33.3%", "12.5%")
|
||||
)
|
||||
summary_tables$weed_presence_summary <- data.frame(
|
||||
"Weed Risk Level" = c("Low", "Moderate", "High"),
|
||||
"Field Count" = c(18, 6, 2),
|
||||
"Percent" = c("75.0%", "25.0%", "8.3%")
|
||||
)
|
||||
doc <- read_docx()
|
||||
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
|
||||
doc <- body_add_par(doc, "Executive Summary - Key Performance Indicators", style = "heading 2")
|
||||
doc <- body_add_par(doc, "This section demonstrates just two KPI tables side by side.", style = "Normal")
|
||||
doc <- body_add_section(doc, prop_section(
|
||||
section_type = "continuous",
|
||||
columns = columns(widths = c(4.25, 4.25))
|
||||
))
|
||||
doc <- body_add_flextable(doc, flextable(summary_tables$field_uniformity_summary) %>% set_caption("Field Uniformity Summary"))
|
||||
doc <- body_add_break(doc, "column")
|
||||
doc <- body_add_flextable(doc, flextable(summary_tables$weed_presence_summary) %>% set_caption("Weed Presence Score Summary"))
|
||||
doc <- body_add_section(doc, prop_section(
|
||||
section_type = "continuous",
|
||||
columns = columns(widths = c(8.5))
|
||||
))
|
||||
doc <- body_add_par(doc, "This is a test report to verify the KPI grid layout.", style = "Normal")
|
||||
print(doc, target = "tables_side_by_side.docx")
|
||||
here()
|
||||
getwd()
|
||||
print(doc, target = "tables_side_by_side.docx")
|
||||
doc
|
||||
print(doc, target = "tables_side_by_side.docx")
|
||||
print(doc, target = "r_app/tables_side_by_side.docx")
|
||||
library(officer)
|
||||
library(flextable)
|
||||
# Create example data
|
||||
summary_tables <- list()
|
||||
summary_tables$field_uniformity_summary <- data.frame(
|
||||
"Uniformity Level" = c("Excellent", "Good", "Poor"),
|
||||
"Count" = c(15, 8, 3),
|
||||
"Percent" = c("62.5%", "33.3%", "12.5%")
|
||||
)
|
||||
summary_tables$weed_presence_summary <- data.frame(
|
||||
"Weed Risk Level" = c("Low", "Moderate", "High"),
|
||||
"Field Count" = c(18, 6, 2),
|
||||
"Percent" = c("75.0%", "25.0%", "8.3%")
|
||||
)
|
||||
# Create document
|
||||
doc <- read_docx()
|
||||
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
|
||||
doc <- body_add_par(doc, "Executive Summary - Key Performance Indicators", style = "heading 2")
|
||||
doc <- body_add_par(doc, "This section demonstrates just two KPI tables side by side.", style = "Normal")
|
||||
# Two-column section
|
||||
doc <- body_add_section(doc, prop_section(
|
||||
section_type = "continuous",
|
||||
columns = columns(widths = c(4.25, 4.25))
|
||||
))
|
||||
library(officer)
|
||||
library(flextable)
|
||||
# Create example data
|
||||
summary_tables <- list()
|
||||
summary_tables$field_uniformity_summary <- data.frame(
|
||||
"Uniformity Level" = c("Excellent", "Good", "Poor"),
|
||||
"Count" = c(15, 8, 3),
|
||||
"Percent" = c("62.5%", "33.3%", "12.5%")
|
||||
)
|
||||
summary_tables$weed_presence_summary <- data.frame(
|
||||
"Weed Risk Level" = c("Low", "Moderate", "High"),
|
||||
"Field Count" = c(18, 6, 2),
|
||||
"Percent" = c("75.0%", "25.0%", "8.3%")
|
||||
)
|
||||
# Create document
|
||||
doc <- read_docx()
|
||||
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
|
||||
doc <- body_add_par(doc, "Executive Summary - Key Performance Indicators", style = "heading 2")
|
||||
doc <- body_add_par(doc, "This section demonstrates just two KPI tables side by side.", style = "Normal")
|
||||
# Two-column section
|
||||
doc <- body_add_section(doc, prop_section(
|
||||
section_type = "continuous",
|
||||
columns = columns(widths = c(4.25, 4.25))
|
||||
))
|
||||
packageVersion("officer")
|
||||
??body_add_section
|
||||
library(officer)
|
||||
?body_add_section
|
||||
library(officer)
|
||||
library(flextable)
|
||||
# Create example data
|
||||
ft1 <- flextable(data.frame(
|
||||
"Uniformity Level" = c("Excellent", "Good", "Poor"),
|
||||
"Count" = c(15, 8, 3),
|
||||
"Percent" = c("62.5%", "33.3%", "12.5%")
|
||||
)) %>% set_caption("Field Uniformity Summary")
|
||||
ft2 <- flextable(data.frame(
|
||||
"Weed Risk Level" = c("Low", "Moderate", "High"),
|
||||
"Field Count" = c(18, 6, 2),
|
||||
"Percent" = c("75.0%", "25.0%", "8.3%")
|
||||
)) %>% set_caption("Weed Presence Score Summary")
|
||||
doc <- read_docx()
|
||||
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
|
||||
library(dplyr)
|
||||
# Create example data
|
||||
ft1 <- flextable(data.frame(
|
||||
"Uniformity Level" = c("Excellent", "Good", "Poor"),
|
||||
"Count" = c(15, 8, 3),
|
||||
"Percent" = c("62.5%", "33.3%", "12.5%")
|
||||
)) %>% set_caption("Field Uniformity Summary")
|
||||
ft2 <- flextable(data.frame(
|
||||
"Weed Risk Level" = c("Low", "Moderate", "High"),
|
||||
"Field Count" = c(18, 6, 2),
|
||||
"Percent" = c("75.0%", "25.0%", "8.3%")
|
||||
)) %>% set_caption("Weed Presence Score Summary")
|
||||
doc <- read_docx()
|
||||
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
|
||||
doc <- body_add_par(doc, "Executive Summary - Key Performance Indicators", style = "heading 2")
|
||||
doc <- body_add_par(doc, "This section demonstrates two KPI tables side by side.", style = "Normal")
|
||||
# Create a Word table (1 row, 2 columns)
|
||||
doc <- body_add_table(doc, value = data.frame(A = "", B = ""), style = "Table Grid")
|
||||
# Move cursor to first cell, insert first flextable
|
||||
doc <- cursor_forward(doc)
|
||||
doc <- slip_in_flextable(doc, ft1, pos = "on")
|
||||
# Move cursor to second cell, insert second flextable
|
||||
doc <- cursor_forward(doc)
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Run planet_download for Kibos since September 2023 till today
|
||||
# Usage: ./01_run_planet_download.sh --project_dir=kibos --date=2023-09-01 --days=<number_of_days>
|
||||
|
||||
project_dir="kibos"
|
||||
date="2023-09-01"
|
||||
days=1
|
||||
bbox=""
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--days=*)
|
||||
days="${arg#*=}"
|
||||
;;
|
||||
--date=*)
|
||||
date="${arg#*=}"
|
||||
;;
|
||||
--project_dir=*)
|
||||
project_dir="${arg#*=}"
|
||||
;;
|
||||
--bbox=*)
|
||||
bbox="${arg#*=}"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $arg"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
echo "Running planet_download for $project_dir from $date for $days days."
|
||||
script_dir="$(dirname "$0")"
|
||||
source "$script_dir/python_app/myenv/bin/activate"
|
||||
jupyter nbconvert --execute --to script --stdout "$script_dir/python_app/planet_download.ipynb"
|
||||
deactivate
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Run ci_extraction.R
|
||||
# Usage: ./02_run_ci_extraction.sh --end_date=<YYYY-MM-DD> --offset=<days> --project_dir=kibos
|
||||
|
||||
end_date=$(date +'%Y-%m-%d')
|
||||
offset=28
|
||||
project_dir="kibos"
|
||||
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--end_date=*)
|
||||
end_date="${arg#*=}"
|
||||
;;
|
||||
--offset=*)
|
||||
offset="${arg#*=}"
|
||||
;;
|
||||
--project_dir=*)
|
||||
project_dir="${arg#*=}"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $arg"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
echo "Running ci_extraction.R for $project_dir with end_date $end_date and offset $offset."
|
||||
cd r_app
|
||||
Rscript 02_ci_extraction.R $end_date $offset $project_dir
|
||||
cd ..
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Run mosaic_creation.R
|
||||
# Usage: ./04_run_mosaic_creation.sh --end_date=<YYYY-MM-DD> --offset=<days> --data_dir=kibos --file_name_tif=<filename>
|
||||
|
||||
end_date="$(date +%Y-%m-%d)"
|
||||
offset=7
|
||||
data_dir="kibos"
|
||||
file_name_tif="week_03_2024.tif"
|
||||
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--offset=*)
|
||||
offset="${arg#*=}"
|
||||
;;
|
||||
--end_date=*)
|
||||
end_date="${arg#*=}"
|
||||
;;
|
||||
--data_dir=*)
|
||||
data_dir="${arg#*=}"
|
||||
;;
|
||||
--file_name_tif=*)
|
||||
file_name_tif="${arg#*=}"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $arg"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
echo "Running mosaic_creation.R for $data_dir with end_date $end_date, offset $offset, file $file_name_tif."
|
||||
cd r_app
|
||||
Rscript 04_mosaic_creation.R $end_date $offset $data_dir $file_name_tif
|
||||
cd ..
|
||||
50
10_planet_download.sh
Normal file
50
10_planet_download.sh
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
#!/bin/bash
|
||||
date=$(date +%Y-%m-%d)
|
||||
# Standaardwaarde voor days
|
||||
days=1
|
||||
project_dir="chemba"
|
||||
|
||||
# Loop door alle argumenten
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--days=*)
|
||||
days="${arg#*=}"
|
||||
;;
|
||||
--date=*)
|
||||
date="${arg#*=}"
|
||||
;;
|
||||
--project_dir=*)
|
||||
project_dir="${arg#*=}"
|
||||
;;
|
||||
--bbox=*)
|
||||
bbox="${arg#*=}"
|
||||
;;
|
||||
*)
|
||||
echo "Onbekende optie: $arg"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Gebruik de variabele in je script
|
||||
echo "Datum: $date"
|
||||
echo "Aantal dagen: $days"
|
||||
echo "Project directory: $project_dir"
|
||||
echo "BBOX: $bbox"
|
||||
|
||||
|
||||
# Activeer de virtuele omgeving
|
||||
script_dir="$(dirname "$0")"
|
||||
source "$script_dir/python_app/myenv/bin/activate"
|
||||
echo "$script_dir/python_app/planet_download.ipynb"
|
||||
export DAYS=$days
|
||||
export DATE=$date
|
||||
export PROJECT_DIR=$project_dir
|
||||
export BBOX=$bbox
|
||||
|
||||
# Hier kan je verdere stappen toevoegen, zoals het uitvoeren van je Python-script of Jupyter Notebook
|
||||
jupyter nbconvert --execute --to script --stdout "$script_dir/python_app/planet_download.ipynb" #needs to be calling 00_download_8band_pu_optimized.py instead of the notebook directly
|
||||
|
||||
# Deactiveer de virtuele omgeving (optioneel)
|
||||
deactivate
|
||||
37
20_ci_extraction.sh
Normal file
37
20_ci_extraction.sh
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
end_date=$(date +'%Y-%m-%d')
|
||||
offset=28
|
||||
project_dir="Bagamoyo_trial"
|
||||
|
||||
# Parse command line arguments
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--end_date=*)
|
||||
end_date="${arg#*=}"
|
||||
;;
|
||||
--offset=*)
|
||||
offset="${arg#*=}"
|
||||
;;
|
||||
--project_dir=*)
|
||||
project_dir="${arg#*=}"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $arg"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
echo "end_date: $end_date"
|
||||
echo "offset: $offset"
|
||||
|
||||
# Check if required arguments are set
|
||||
if [ -z "$end_date" ] || [ -z "$project_dir" ] || [ -z "$offset" ]; then
|
||||
echo "Missing arguments. Use: ci_extraction.sh --end_date=2024-01-01 --offset=28 --project_dir=Bagamoyo_trial"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ci_extraction.R $end_date $offset $project_dir
|
||||
|
||||
cd ../r_app
|
||||
Rscript 20_ci_extraction.R $end_date $offset $project_dir
|
||||
|
|
@ -17,6 +17,6 @@ for arg in "$@"; do
|
|||
done
|
||||
|
||||
echo "Running interpolate_growth_model.R for $project_dir."
|
||||
cd r_app
|
||||
Rscript 03_interpolate_growth_model.R $project_dir
|
||||
cd ../r_app
|
||||
Rscript 30_interpolate_growth_model.R $project_dir
|
||||
cd ..
|
||||
43
40_mosaic_creation.sh
Normal file
43
40_mosaic_creation.sh
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
|
||||
end_date="2024-06-08"
|
||||
offset=7
|
||||
data_dir="chemba"
|
||||
file_name_tif="week_03_2024.tif"
|
||||
|
||||
# Parse command line arguments
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--offset=*)
|
||||
offset="${arg#*=}"
|
||||
;;
|
||||
--end_date=*)
|
||||
end_date="${arg#*=}"
|
||||
;;
|
||||
--data_dir=*)
|
||||
data_dir="${arg#*=}"
|
||||
;;
|
||||
--file_name_tif=*)
|
||||
file_name_tif="${arg#*=}"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $arg"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
echo "offset: $offset"
|
||||
echo "end_date: $end_date"
|
||||
|
||||
# Check if required arguments are set
|
||||
if [ -z "$end_date" ] || [ -z "$data_dir" ] || [ -z "$offset" ] || [ -z "$file_name_tif" ]; then
|
||||
echo "Missing arguments. Use: 40_mosaic_creation.sh --endate=2024-01-01 --offset=7 --data_dir=chemba --file_name_tif=week_03_2024.tif"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo 40_mosaic_creation.R $end_date $offset $data_dir $file_name_tif
|
||||
|
||||
cd ../r_app
|
||||
Rscript 40_mosaic_creation.R $end_date $offset $data_dir $file_name_tif
|
||||
|
|
@ -7,10 +7,18 @@
|
|||
# and ensures proper R execution with renv environment and error handling.
|
||||
|
||||
# Script configuration
|
||||
SCRIPT_NAME="09_run_calculate_kpis.sh"
|
||||
R_SCRIPT_NAME="09_calculate_kpis.R"
|
||||
SCRIPT_NAME="80_calculate_kpis.sh"
|
||||
R_SCRIPT_NAME="80_calculate_kpis.R"
|
||||
LOG_PREFIX="[KPI_CALC]"
|
||||
|
||||
|
||||
project_dir="tz11_mbigiri_john_trial"
|
||||
offset=7
|
||||
end_date=$(date +"%Y-%m-%d")
|
||||
|
||||
|
||||
|
||||
|
||||
# Function to log messages with timestamp
|
||||
log_message() {
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') $LOG_PREFIX $1"
|
||||
|
|
@ -40,76 +48,60 @@ check_directory() {
|
|||
|
||||
# Main execution function
|
||||
main() {
|
||||
R_CMD ="Rscript"
|
||||
log_message "Starting KPI calculation pipeline step"
|
||||
|
||||
# Check if we're in the correct directory
|
||||
if [ ! -f "r_app/$R_SCRIPT_NAME" ]; then
|
||||
handle_error "Must be run from smartcane root directory (where r_app/ folder exists)"
|
||||
if [ ! -f "../r_app/$R_SCRIPT_NAME" ]; then
|
||||
handle_error "Must be run from lavevel_app directory (where ../r_app/ folder exists)"
|
||||
fi
|
||||
|
||||
# Check for R installation
|
||||
if ! command -v R &> /dev/null; then
|
||||
# Try Windows R installation path
|
||||
R_CMD="C:/Program Files/R/R-4.4.3/bin/x64/R.exe"
|
||||
if [ ! -f "$R_CMD" ]; then
|
||||
handle_error "R not found in PATH or at expected Windows location"
|
||||
fi
|
||||
else
|
||||
R_CMD="R"
|
||||
fi
|
||||
|
||||
log_message "Using R at: $R_CMD"
|
||||
|
||||
# Set default project directory if not provided
|
||||
if [ -z "$1" ]; then
|
||||
PROJECT_DIR="esa"
|
||||
log_message "No project directory specified, using default: $PROJECT_DIR"
|
||||
else
|
||||
PROJECT_DIR="$1"
|
||||
log_message "Using project directory: $PROJECT_DIR"
|
||||
fi
|
||||
log_message "Using project directory: $project_dir"
|
||||
|
||||
# Check if project directory exists
|
||||
PROJECT_PATH="laravel_app/storage/app/$PROJECT_DIR"
|
||||
check_directory "$PROJECT_PATH" || handle_error "Project directory not found: $PROJECT_PATH"
|
||||
project_path="../laravel_app/storage/app/$project_dir"
|
||||
check_directory "$project_path" || handle_error "Project directory not found: $project_path"
|
||||
|
||||
# Check for required data files
|
||||
check_file "$PROJECT_PATH/Data/pivot.geojson"
|
||||
check_file "$project_path/Data/pivot.geojson"
|
||||
|
||||
# Check for weekly mosaic directory
|
||||
MOSAIC_DIR="$PROJECT_PATH/weekly_mosaic"
|
||||
check_directory "$MOSAIC_DIR" || handle_error "Weekly mosaic directory not found: $MOSAIC_DIR"
|
||||
mosaic_dir="$project_path/weekly_mosaic"
|
||||
check_directory "$mosaic_dir" || handle_error "Weekly mosaic directory not found: $mosaic_dir"
|
||||
|
||||
# Count available mosaics
|
||||
MOSAIC_COUNT=$(find "$MOSAIC_DIR" -name "week_*.tif" 2>/dev/null | wc -l)
|
||||
if [ "$MOSAIC_COUNT" -lt 1 ]; then
|
||||
handle_error "No weekly mosaics found in $MOSAIC_DIR"
|
||||
mosaic_count=$(find "$mosaic_dir" -name "week_*.tif" 2>/dev/null | wc -l)
|
||||
if [ "$mosaic_count" -lt 1 ]; then
|
||||
handle_error "No weekly mosaics found in $mosaic_dir"
|
||||
fi
|
||||
log_message "Found $MOSAIC_COUNT weekly mosaics in $MOSAIC_DIR"
|
||||
log_message "Found $mosaic_count weekly mosaics in $mosaic_dir"
|
||||
|
||||
# Create temporary R script with project configuration
|
||||
TEMP_R_SCRIPT="temp_kpi_calc_$$.R"
|
||||
cat > "r_app/$TEMP_R_SCRIPT" << EOF
|
||||
temp_r_script="temp_kpi_calc_$$.R"
|
||||
cat > "../r_app/$temp_r_script" << EOF
|
||||
# Temporary KPI calculation script
|
||||
# Generated by $SCRIPT_NAME on $(date)
|
||||
|
||||
# Set project directory
|
||||
project_dir <- "$PROJECT_DIR"
|
||||
# project_dir <- "$PROJECT_DIR"
|
||||
|
||||
# Set working directory to r_app
|
||||
setwd("r_app")
|
||||
#setwd("r_app")
|
||||
|
||||
# Source the main KPI calculation script
|
||||
tryCatch({
|
||||
source("$R_SCRIPT_NAME")
|
||||
cat("✓ KPI calculation completed successfully\\n")
|
||||
cat("✓ KPI calculation completed successfully!!n")
|
||||
}, error = function(e) {
|
||||
cat("✗ Error in KPI calculation:", e\$message, "\\n")
|
||||
quit(status = 1)
|
||||
})
|
||||
EOF
|
||||
|
||||
log_message "Created temporary R script: r_app/$TEMP_R_SCRIPT"
|
||||
log_message "Created temporary R script: r_app/$temp_r_script"
|
||||
|
||||
# Execute R script
|
||||
log_message "Starting R execution..."
|
||||
|
|
@ -124,12 +116,17 @@ EOF
|
|||
R_EXIT_CODE=$?
|
||||
else
|
||||
# Unix/Linux execution
|
||||
"$R_CMD" --vanilla < "r_app/$TEMP_R_SCRIPT"
|
||||
cd r_app
|
||||
|
||||
log_message "calling $R_CMD $temp_r_script "
|
||||
Rscript "$temp_r_script" "$end_date" "$offset" "$project_dir"
|
||||
|
||||
R_EXIT_CODE=$?
|
||||
fi
|
||||
|
||||
|
||||
# Clean up temporary script
|
||||
rm -f "r_app/$TEMP_R_SCRIPT"
|
||||
rm -f "../r_app/$temp_r_script"
|
||||
log_message "Cleaned up temporary R script"
|
||||
|
||||
# Check R execution result
|
||||
|
|
@ -137,7 +134,7 @@ EOF
|
|||
log_message "✓ KPI calculation completed successfully"
|
||||
|
||||
# Check if output files were created
|
||||
REPORTS_DIR="laravel_app/storage/app/$PROJECT_DIR/reports"
|
||||
REPORTS_DIR="../laravel_app/storage/app/$project_dir/reports"
|
||||
if check_directory "$REPORTS_DIR/kpis"; then
|
||||
KPI_FILES=$(find "$REPORTS_DIR/kpis" -name "*$(date '+%Y%m%d')*" 2>/dev/null | wc -l)
|
||||
if [ "$KPI_FILES" -gt 0 ]; then
|
||||
|
|
@ -156,18 +153,21 @@ EOF
|
|||
|
||||
# Script usage information
|
||||
usage() {
|
||||
echo "Usage: $0 [PROJECT_DIR]"
|
||||
echo "Usage: $0 --project_dir=[PROJECT_DIR] --offset=[number] --end-date=[date]"
|
||||
echo ""
|
||||
echo "Calculate KPI metrics for SmartCane monitoring system"
|
||||
echo ""
|
||||
echo "Parameters:"
|
||||
echo " PROJECT_DIR Project directory name (default: esa)"
|
||||
echo " --project_dir Project directory name (default: esa)"
|
||||
echo " Must exist in laravel_app/storage/app/"
|
||||
echo ""
|
||||
echo " --offset (default: 7)"
|
||||
echo ""
|
||||
echo " --end-date (default: $(date +%Y-%m-%d))"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 # Use default 'esa' project"
|
||||
echo " $0 aura # Use 'aura' project"
|
||||
echo " $0 chemba # Use 'chemba' project"
|
||||
echo " $0 --project_dir=aura --offset=7 # Use 'aura' project with offset 7"
|
||||
echo ""
|
||||
echo "Requirements:"
|
||||
echo " - R installation (4.4.3 or compatible)"
|
||||
|
|
@ -176,13 +176,41 @@ usage() {
|
|||
echo " - Field boundaries in PROJECT_DIR/Data/pivot.geojson"
|
||||
}
|
||||
|
||||
# Handle command line arguments
|
||||
case "${1:-}" in
|
||||
## Parse command line arguments
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
main "$@"
|
||||
--offset=*)
|
||||
offset="${arg#*=}"
|
||||
;;
|
||||
esac
|
||||
--end_date=*)
|
||||
end_date="${arg#*=}"
|
||||
;;
|
||||
--project_dir=*)
|
||||
project_dir="${arg#*=}"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $arg"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Validate required arguments bit dumb because all have defaults;
|
||||
# ----------------------------------------------------------------
|
||||
if [[ -z "$project_dir" || -z "$offset" || -z "$end_date" ]]; then
|
||||
echo "❌ Missing required arguments." >&2
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# -------------------------------------
|
||||
# Run main
|
||||
# -------------------------------------
|
||||
main
|
||||
|
|
@ -51,6 +51,6 @@ done
|
|||
|
||||
echo "Running CI report with KPIs for $data_dir, report date $report_date, mail day $mail_day."
|
||||
echo "Parameters: borders=$borders, ci_plot_type=$ci_plot_type, colorblind=$colorblind_friendly, facet_by_season=$facet_by_season, x_axis_unit=$x_axis_unit"
|
||||
cd r_app
|
||||
Rscript -e "rmarkdown::render('10_CI_report_with_kpis_simple.Rmd', output_file='$filename', params=list(report_date='$report_date', mail_day='$mail_day', data_dir='$data_dir', borders='$borders', ci_plot_type='$ci_plot_type', colorblind_friendly='$colorblind_friendly', facet_by_season='$facet_by_season', x_axis_unit='$x_axis_unit'))"
|
||||
cd ../r_app
|
||||
Rscript -e "rmarkdown::render('90_CI_report_with_kpis_simple.Rmd', output_file='$filename', params=list(report_date='$report_date', mail_day='$mail_day', data_dir='$data_dir', borders='$borders', ci_plot_type='$ci_plot_type', colorblind_friendly='$colorblind_friendly', facet_by_season='$facet_by_season', x_axis_unit='$x_axis_unit'))"
|
||||
cd ..
|
||||
|
|
@ -1,180 +0,0 @@
|
|||
# Analyze timing between CI threshold crossings and actual harvest dates
|
||||
# Goal: Determine how soon after CI drops below threshold the harvest actually occurs
|
||||
suppressPackageStartupMessages({
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(lubridate)
|
||||
library(here)
|
||||
library(ggplot2)
|
||||
})
|
||||
|
||||
# Set project directory
|
||||
project_dir <- "esa"
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
source(here("r_app", "parameters_project.R"))
|
||||
|
||||
# Read daily CI data
|
||||
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
||||
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
||||
|
||||
time_series_daily <- ci_data_raw %>%
|
||||
mutate(date = as.Date(Date)) %>%
|
||||
select(field_id = field, date, ci = FitData) %>%
|
||||
arrange(field_id, date)
|
||||
|
||||
# Read actual harvest data
|
||||
harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
||||
mutate(
|
||||
season_start = as.Date(season_start),
|
||||
season_end = as.Date(season_end)
|
||||
) %>%
|
||||
filter(!is.na(season_end))
|
||||
|
||||
cat("=== ANALYZING CI THRESHOLD CROSSING TIMING ===\n\n")
|
||||
|
||||
# For each actual harvest, find when CI first dropped below various thresholds
|
||||
thresholds <- c(3.0, 2.5, 2.0, 1.8)
|
||||
|
||||
results <- list()
|
||||
|
||||
for (i in 1:nrow(harvest_actual)) {
|
||||
harvest <- harvest_actual[i, ]
|
||||
field <- harvest$field
|
||||
harvest_date <- harvest$season_end
|
||||
|
||||
# Get CI data for this field in the year before harvest
|
||||
field_data <- time_series_daily %>%
|
||||
filter(field_id == field,
|
||||
date >= (harvest_date - 365),
|
||||
date <= harvest_date) %>%
|
||||
arrange(date)
|
||||
|
||||
if (nrow(field_data) == 0) next
|
||||
|
||||
# For each threshold, find LAST crossing date (working backward from harvest)
|
||||
# This finds the mature→harvest transition, not the previous cycle's harvest
|
||||
threshold_crossings <- sapply(thresholds, function(threshold) {
|
||||
# Find LAST period where CI was high (>3.5), then dropped below threshold
|
||||
# Work backward from harvest date
|
||||
last_mature_idx <- NA
|
||||
for (j in nrow(field_data):1) {
|
||||
if (!is.na(field_data$ci[j]) && field_data$ci[j] > 3.5) {
|
||||
last_mature_idx <- j
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
# If no mature period found, skip
|
||||
if (is.na(last_mature_idx)) return(NA)
|
||||
|
||||
# Now find first crossing below threshold AFTER the mature period
|
||||
for (j in last_mature_idx:(nrow(field_data) - 2)) {
|
||||
if (!is.na(field_data$ci[j]) && !is.na(field_data$ci[j+1]) && !is.na(field_data$ci[j+2]) &&
|
||||
field_data$ci[j] < threshold &&
|
||||
field_data$ci[j+1] < threshold &&
|
||||
field_data$ci[j+2] < threshold) {
|
||||
return(as.character(field_data$date[j]))
|
||||
}
|
||||
}
|
||||
return(NA)
|
||||
})
|
||||
|
||||
result_row <- data.frame(
|
||||
field = field,
|
||||
harvest_date = harvest_date,
|
||||
ci_at_harvest = field_data$ci[nrow(field_data)]
|
||||
)
|
||||
|
||||
for (k in 1:length(thresholds)) {
|
||||
threshold <- thresholds[k]
|
||||
crossing_date <- as.Date(threshold_crossings[k])
|
||||
|
||||
if (!is.na(crossing_date)) {
|
||||
days_before_harvest <- as.numeric(harvest_date - crossing_date)
|
||||
result_row[[paste0("first_below_", threshold)]] <- as.character(crossing_date)
|
||||
result_row[[paste0("days_before_", threshold)]] <- days_before_harvest
|
||||
} else {
|
||||
result_row[[paste0("first_below_", threshold)]] <- NA
|
||||
result_row[[paste0("days_before_", threshold)]] <- NA
|
||||
}
|
||||
}
|
||||
|
||||
results[[i]] <- result_row
|
||||
}
|
||||
|
||||
timing_analysis <- bind_rows(results)
|
||||
|
||||
# Print summary statistics
|
||||
cat("\n=== TIMING STATISTICS: Days from threshold crossing to actual harvest ===\n\n")
|
||||
|
||||
for (threshold in thresholds) {
|
||||
days_col <- paste0("days_before_", threshold)
|
||||
days_before <- timing_analysis[[days_col]]
|
||||
days_before <- days_before[!is.na(days_before)]
|
||||
|
||||
if (length(days_before) > 0) {
|
||||
cat(sprintf("CI < %.1f threshold:\n", threshold))
|
||||
cat(sprintf(" Valid cases: %d/%d (%.1f%%)\n",
|
||||
length(days_before), nrow(timing_analysis),
|
||||
100 * length(days_before) / nrow(timing_analysis)))
|
||||
cat(sprintf(" Mean: %.1f days before harvest\n", mean(days_before)))
|
||||
cat(sprintf(" Median: %.1f days before harvest\n", median(days_before)))
|
||||
cat(sprintf(" Range: %.1f to %.1f days\n", min(days_before), max(days_before)))
|
||||
cat(sprintf(" Q1-Q3: %.1f to %.1f days\n", quantile(days_before, 0.25), quantile(days_before, 0.75)))
|
||||
|
||||
# Count how many harvests occur within specific time windows after crossing
|
||||
within_7d <- sum(days_before >= 0 & days_before <= 7)
|
||||
within_14d <- sum(days_before >= 0 & days_before <= 14)
|
||||
within_21d <- sum(days_before >= 0 & days_before <= 21)
|
||||
within_30d <- sum(days_before >= 0 & days_before <= 30)
|
||||
|
||||
cat(sprintf(" Harvest timing after crossing:\n"))
|
||||
cat(sprintf(" 0-7 days: %d (%.1f%%)\n", within_7d, 100*within_7d/length(days_before)))
|
||||
cat(sprintf(" 0-14 days: %d (%.1f%%)\n", within_14d, 100*within_14d/length(days_before)))
|
||||
cat(sprintf(" 0-21 days: %d (%.1f%%)\n", within_21d, 100*within_21d/length(days_before)))
|
||||
cat(sprintf(" 0-30 days: %d (%.1f%%)\n", within_30d, 100*within_30d/length(days_before)))
|
||||
cat("\n")
|
||||
} else {
|
||||
cat(sprintf("CI < %.1f threshold: No valid crossings found\n\n", threshold))
|
||||
}
|
||||
}
|
||||
|
||||
# Show detailed table for fields with mismatches
|
||||
cat("\n=== DETAILED TIMING BY FIELD ===\n")
|
||||
|
||||
# Get column names dynamically
|
||||
days_cols <- grep("days_before_", names(timing_analysis), value = TRUE)
|
||||
select_cols <- c("field", "harvest_date", "ci_at_harvest", days_cols[1:min(2, length(days_cols))])
|
||||
|
||||
print(timing_analysis %>%
|
||||
select(all_of(select_cols)) %>%
|
||||
arrange(field, harvest_date), n = 100)
|
||||
|
||||
# Create visualization
|
||||
cat("\n=== Creating timing distribution plot ===\n")
|
||||
timing_long <- timing_analysis %>%
|
||||
select(field, harvest_date, starts_with("days_before_")) %>%
|
||||
pivot_longer(cols = starts_with("days_before_"),
|
||||
names_to = "threshold",
|
||||
values_to = "days_before") %>%
|
||||
filter(!is.na(days_before)) %>%
|
||||
mutate(threshold = gsub("days_before_", "CI < ", threshold))
|
||||
|
||||
png("timing_threshold_to_harvest.png", width = 1200, height = 800, res = 120)
|
||||
ggplot(timing_long, aes(x = days_before, fill = threshold)) +
|
||||
geom_histogram(binwidth = 7, alpha = 0.7, position = "identity") +
|
||||
facet_wrap(~threshold, ncol = 1) +
|
||||
geom_vline(xintercept = c(7, 14, 21), linetype = "dashed", color = "red", alpha = 0.5) +
|
||||
labs(
|
||||
title = "Time from CI Threshold Crossing to Actual Harvest",
|
||||
subtitle = "How many days AFTER CI drops below threshold does harvest actually occur?",
|
||||
x = "Days from threshold crossing to harvest",
|
||||
y = "Count of harvest events",
|
||||
caption = "Dashed lines at 7, 14, 21 days"
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(legend.position = "none")
|
||||
dev.off()
|
||||
|
||||
cat("\nPlot saved to: timing_threshold_to_harvest.png\n")
|
||||
|
|
@ -1,197 +0,0 @@
|
|||
# Analyze CI drop patterns to distinguish harvest from anomalies
|
||||
# Goal: Identify characteristics of true harvest drops vs single-day noise
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(lubridate)
|
||||
library(here)
|
||||
library(ggplot2)
|
||||
})
|
||||
|
||||
project_dir <- "esa"
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
source(here("r_app", "parameters_project.R"))
|
||||
|
||||
# Read daily CI data
|
||||
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
||||
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
||||
|
||||
time_series_daily <- ci_data_raw %>%
|
||||
mutate(date = as.Date(Date)) %>%
|
||||
select(field_id = field, date, ci = FitData) %>%
|
||||
arrange(field_id, date) %>%
|
||||
group_by(field_id) %>%
|
||||
mutate(
|
||||
# Calculate changes
|
||||
ci_lag1 = lag(ci, 1),
|
||||
ci_lag2 = lag(ci, 2),
|
||||
ci_lead1 = lead(ci, 1),
|
||||
ci_lead2 = lead(ci, 2),
|
||||
ci_lead3 = lead(ci, 3),
|
||||
|
||||
# Drop magnitude
|
||||
drop_1day = ci_lag1 - ci,
|
||||
drop_2day = ci_lag2 - ci,
|
||||
|
||||
# Recovery after drop
|
||||
recovery_1day = ci_lead1 - ci,
|
||||
recovery_2day = ci_lead2 - ci,
|
||||
recovery_3day = ci_lead3 - ci,
|
||||
|
||||
# Is this a single-day anomaly?
|
||||
is_spike_drop = (ci < 2.0 & ci_lag1 > 3.0 & ci_lead1 > 3.0)
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
# Read actual harvest data
|
||||
harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
||||
mutate(
|
||||
season_start = as.Date(season_start),
|
||||
season_end = as.Date(season_end)
|
||||
) %>%
|
||||
filter(!is.na(season_end))
|
||||
|
||||
cat("=== ANALYZING CI DROP PATTERNS ===\n\n")
|
||||
|
||||
# Find all instances where CI drops below 2.0
|
||||
all_drops <- time_series_daily %>%
|
||||
filter(ci < 2.0, ci_lag1 > 2.0) %>% # First day below 2.0
|
||||
select(field_id, date, ci, ci_lag1, drop_1day,
|
||||
ci_lead1, ci_lead2, ci_lead3,
|
||||
recovery_1day, recovery_2day, recovery_3day)
|
||||
|
||||
# Classify drops based on what happens next
|
||||
drops_classified <- all_drops %>%
|
||||
mutate(
|
||||
drop_type = case_when(
|
||||
# Spike: drops but recovers to >3.0 within 3 days
|
||||
!is.na(ci_lead1) & ci_lead1 > 3.0 ~ "SPIKE (1-day anomaly)",
|
||||
!is.na(ci_lead2) & ci_lead2 > 3.0 ~ "SPIKE (2-day anomaly)",
|
||||
!is.na(ci_lead3) & ci_lead3 > 3.0 ~ "SPIKE (3-day anomaly)",
|
||||
|
||||
# Sustained: stays below 2.5 for at least 3 days
|
||||
!is.na(ci_lead1) & !is.na(ci_lead2) & !is.na(ci_lead3) &
|
||||
ci_lead1 < 2.5 & ci_lead2 < 2.5 & ci_lead3 < 2.5 ~ "SUSTAINED (likely harvest)",
|
||||
|
||||
TRUE ~ "UNCLEAR (insufficient data)"
|
||||
),
|
||||
|
||||
sharp_drop = drop_1day > 1.0 # Drop >1 CI point
|
||||
)
|
||||
|
||||
cat("=== DROP TYPE DISTRIBUTION ===\n")
|
||||
drop_summary <- drops_classified %>%
|
||||
count(drop_type) %>%
|
||||
mutate(percent = 100 * n / sum(n)) %>%
|
||||
arrange(desc(n))
|
||||
|
||||
print(drop_summary)
|
||||
|
||||
cat("\n=== SHARP DROPS (>1.0 CI point) ===\n")
|
||||
sharp_summary <- drops_classified %>%
|
||||
filter(sharp_drop) %>%
|
||||
count(drop_type) %>%
|
||||
mutate(percent = 100 * n / sum(n))
|
||||
|
||||
print(sharp_summary)
|
||||
|
||||
# Match drops to actual harvests
|
||||
cat("\n=== MATCHING DROPS TO ACTUAL HARVESTS ===\n")
|
||||
|
||||
drops_with_harvest <- drops_classified %>%
|
||||
left_join(
|
||||
harvest_actual %>%
|
||||
select(field, actual_harvest_date = season_end),
|
||||
by = c("field_id" = "field")
|
||||
) %>%
|
||||
filter(!is.na(actual_harvest_date)) %>%
|
||||
mutate(
|
||||
days_from_harvest = as.numeric(date - actual_harvest_date),
|
||||
near_harvest = abs(days_from_harvest) <= 14,
|
||||
timing_category = case_when(
|
||||
days_from_harvest >= -7 & days_from_harvest <= 7 ~ "Within 1 week of harvest",
|
||||
days_from_harvest >= -14 & days_from_harvest <= 14 ~ "Within 2 weeks of harvest",
|
||||
days_from_harvest >= -21 & days_from_harvest <= 21 ~ "Within 3 weeks of harvest",
|
||||
TRUE ~ "Far from harvest (>3 weeks)"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== DROP TYPES BY PROXIMITY TO ACTUAL HARVEST ===\n")
|
||||
harvest_proximity_summary <- drops_with_harvest %>%
|
||||
count(drop_type, timing_category) %>%
|
||||
pivot_wider(names_from = timing_category, values_from = n, values_fill = 0)
|
||||
|
||||
print(harvest_proximity_summary)
|
||||
|
||||
# Key insight: What % of SUSTAINED drops are near harvest vs SPIKE drops?
|
||||
cat("\n=== KEY INSIGHT: Are sustained drops near harvest? ===\n")
|
||||
sustained_near_harvest <- drops_with_harvest %>%
|
||||
filter(grepl("SUSTAINED", drop_type)) %>%
|
||||
summarise(
|
||||
total = n(),
|
||||
near_harvest = sum(near_harvest),
|
||||
percent_near = 100 * near_harvest / total
|
||||
)
|
||||
|
||||
spike_near_harvest <- drops_with_harvest %>%
|
||||
filter(grepl("SPIKE", drop_type)) %>%
|
||||
summarise(
|
||||
total = n(),
|
||||
near_harvest = sum(near_harvest),
|
||||
percent_near = 100 * near_harvest / total
|
||||
)
|
||||
|
||||
cat("\nSUSTAINED drops (CI stays low):\n")
|
||||
cat(sprintf(" Total: %d\n", sustained_near_harvest$total))
|
||||
cat(sprintf(" Near harvest (±14d): %d (%.1f%%)\n",
|
||||
sustained_near_harvest$near_harvest,
|
||||
sustained_near_harvest$percent_near))
|
||||
|
||||
cat("\nSPIKE drops (CI recovers quickly):\n")
|
||||
cat(sprintf(" Total: %d\n", spike_near_harvest$total))
|
||||
cat(sprintf(" Near harvest (±14d): %d (%.1f%%)\n",
|
||||
spike_near_harvest$near_harvest,
|
||||
spike_near_harvest$percent_near))
|
||||
|
||||
# Analyze recovery patterns
|
||||
cat("\n=== RECOVERY PATTERNS (how fast does CI bounce back?) ===\n")
|
||||
|
||||
recovery_stats <- drops_classified %>%
|
||||
filter(!is.na(recovery_3day)) %>%
|
||||
group_by(drop_type) %>%
|
||||
summarise(
|
||||
count = n(),
|
||||
mean_recovery_1d = mean(recovery_1day, na.rm = TRUE),
|
||||
mean_recovery_2d = mean(recovery_2day, na.rm = TRUE),
|
||||
mean_recovery_3d = mean(recovery_3day, na.rm = TRUE),
|
||||
median_recovery_1d = median(recovery_1day, na.rm = TRUE),
|
||||
median_recovery_2d = median(recovery_2day, na.rm = TRUE),
|
||||
median_recovery_3d = median(recovery_3day, na.rm = TRUE)
|
||||
)
|
||||
|
||||
print(recovery_stats)
|
||||
|
||||
# Show examples of each type
|
||||
cat("\n=== EXAMPLES: SPIKE (false alarm) ===\n")
|
||||
print(drops_classified %>%
|
||||
filter(drop_type == "SPIKE (1-day anomaly)") %>%
|
||||
select(field_id, date, ci_lag1, ci, ci_lead1, drop_1day, recovery_1day) %>%
|
||||
head(10), n = 10)
|
||||
|
||||
cat("\n=== EXAMPLES: SUSTAINED (likely harvest) ===\n")
|
||||
print(drops_classified %>%
|
||||
filter(drop_type == "SUSTAINED (likely harvest)") %>%
|
||||
select(field_id, date, ci_lag1, ci, ci_lead1, ci_lead2, ci_lead3, drop_1day) %>%
|
||||
head(10), n = 10)
|
||||
|
||||
# Recommendation
|
||||
cat("\n=== RECOMMENDATION ===\n")
|
||||
cat("To avoid false alarms from single-day spikes:\n")
|
||||
cat("1. Require CI to stay below 2.0 for at least 3 consecutive days\n")
|
||||
cat("2. Check that CI doesn't recover above 3.0 within next 3 days\n")
|
||||
cat("3. Sharp drops (>1.0 CI) that sustain are strong harvest signals\n")
|
||||
cat("4. Trade-off: Waiting 3 days for confirmation delays alert by 3 days\n")
|
||||
cat(" - But eliminates false positives from cloud noise\n")
|
||||
cat(" - Harvest still detected 4-11 days before actual event (median 7d)\n")
|
||||
|
|
@ -1,136 +0,0 @@
|
|||
# R script to analyze image dates and missing weeks
|
||||
library(dplyr)
|
||||
library(lubridate)
|
||||
library(ggplot2)
|
||||
|
||||
# Set folder path
|
||||
folder <- "laravel_app/storage/app/esa/merged_final_tif"
|
||||
files <- list.files(folder, pattern = "\\.tif$", full.names = FALSE)
|
||||
|
||||
df <- data.frame(date = dates)
|
||||
# Extract dates and file sizes
|
||||
dates <- as.Date(sub(".tif$", "", files))
|
||||
sizes_kb <- file.info(file.path(folder, files))$size / 1024
|
||||
df <- data.frame(date = dates, size_kb = sizes_kb, file = files) %>%
|
||||
mutate(year = year(date),
|
||||
week = isoweek(date),
|
||||
completeness = ifelse(size_kb >= 9000, "Complete", "Incomplete"))
|
||||
|
||||
# Get all years in data
|
||||
years <- sort(unique(df$year))
|
||||
|
||||
# Prepare output table
|
||||
output <- data.frame(
|
||||
year = integer(),
|
||||
n_images = integer(),
|
||||
n_weeks_missing = integer(),
|
||||
max_consec_weeks_missing = integer(),
|
||||
avg_images_per_week = numeric(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
missing_weeks_list <- list()
|
||||
current_year <- as.integer(format(Sys.Date(), "%Y"))
|
||||
# For plotting: build a data frame with all year/week combinations and count images per week
|
||||
|
||||
# For plotting: count complete/incomplete images per week/year
|
||||
plot_weeks <- expand.grid(year = years, week = 1:52, completeness = c("Complete", "Incomplete"))
|
||||
plot_weeks$n_images <- 0
|
||||
for (i in seq_len(nrow(plot_weeks))) {
|
||||
y <- plot_weeks$year[i]
|
||||
w <- plot_weeks$week[i]
|
||||
ctype <- plot_weeks$completeness[i]
|
||||
plot_weeks$n_images[i] <- sum(df$year == y & df$week == w & df$completeness == ctype)
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Plot: X = week, Y = number of images, fill = completeness, color = year (stacked bar chart)
|
||||
gg <- ggplot(plot_weeks, aes(x = week, y = n_images, fill = completeness)) +
|
||||
geom_col(position = "stack") +
|
||||
facet_wrap(~ year, ncol = 1) +
|
||||
scale_x_continuous(breaks = 1:52) +
|
||||
scale_y_continuous(breaks = 0:max(plot_weeks$n_images)) +
|
||||
labs(x = "Week number", y = "Number of images", fill = "Completeness",
|
||||
title = "Complete vs Incomplete Images per Week (by Year)") +
|
||||
theme_minimal()
|
||||
|
||||
ggsave("images_per_week_by_year_stacked.png", gg, width = 12, height = 10)
|
||||
cat("Plot saved as images_per_week_by_year_stacked.png\n")
|
||||
current_week <- isoweek(Sys.Date())
|
||||
|
||||
|
||||
|
||||
for (y in years) {
|
||||
# For current year, only consider weeks up to today; for past years, all 1:52
|
||||
if (y == current_year) {
|
||||
all_weeks <- 1:current_week
|
||||
} else {
|
||||
all_weeks <- 1:52
|
||||
}
|
||||
weeks_with_images <- unique(df$week[df$year == y])
|
||||
weeks_missing <- setdiff(all_weeks, weeks_with_images)
|
||||
n_weeks_missing <- length(weeks_missing)
|
||||
n_images <- sum(df$year == y)
|
||||
if ((y == current_year) && (current_week - n_weeks_missing > 0)) {
|
||||
avg_images_per_week <- n_images / (current_week - n_weeks_missing)
|
||||
} else if (y != current_year && (52 - n_weeks_missing > 0)) {
|
||||
avg_images_per_week <- n_images / (52 - n_weeks_missing)
|
||||
} else {
|
||||
avg_images_per_week <- NA
|
||||
}
|
||||
# Find longest run of consecutive missing weeks
|
||||
if (n_weeks_missing == 0) {
|
||||
max_consec <- 0
|
||||
} else {
|
||||
w <- sort(weeks_missing)
|
||||
runs <- rle(c(1, diff(w)) == 1)
|
||||
max_consec <- max(runs$lengths[runs$values], na.rm = TRUE)
|
||||
}
|
||||
output <- rbind(output, data.frame(
|
||||
year = y,
|
||||
n_images = n_images,
|
||||
n_weeks_missing = n_weeks_missing,
|
||||
max_consec_weeks_missing = max_consec,
|
||||
avg_images_per_week = round(avg_images_per_week, 2)
|
||||
))
|
||||
if (n_weeks_missing > 0) {
|
||||
missing_weeks_list[[as.character(y)]] <- weeks_missing
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Write to CSV
|
||||
|
||||
print(output)
|
||||
|
||||
write.csv(output, file = "image_availability_by_year.csv", row.names = FALSE)
|
||||
|
||||
|
||||
# Print missing weeks for years with missing data
|
||||
for (y in names(missing_weeks_list)) {
|
||||
cat(sprintf("Year %s missing weeks: %s\n", y, paste(missing_weeks_list[[y]], collapse=", ")))
|
||||
}
|
||||
|
||||
# Calculate and print max consecutive weeks with only incomplete data per year
|
||||
cat("\nMax consecutive weeks with only incomplete images per year:\n")
|
||||
for (y in years) {
|
||||
if (y == current_year) {
|
||||
all_weeks <- 1:current_week
|
||||
} else {
|
||||
all_weeks <- 1:52
|
||||
}
|
||||
# Weeks where all images are incomplete (no complete images)
|
||||
weeks_incomplete <- plot_weeks$week[plot_weeks$year == y & plot_weeks$completeness == "Complete" & plot_weeks$n_images == 0]
|
||||
# Only keep weeks that actually have at least one image (i.e., not missing entirely)
|
||||
weeks_with_any_image <- unique(df$week[df$year == y])
|
||||
weeks_incomplete <- intersect(weeks_incomplete, weeks_with_any_image)
|
||||
if (length(weeks_incomplete) == 0) {
|
||||
max_consec_incomplete <- 0
|
||||
} else {
|
||||
w <- sort(weeks_incomplete)
|
||||
runs <- rle(c(1, diff(w)) == 1)
|
||||
max_consec_incomplete <- max(runs$lengths[runs$values], na.rm = TRUE)
|
||||
}
|
||||
cat(sprintf("Year %d: %d\n", y, max_consec_incomplete))
|
||||
}
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import time
|
||||
|
||||
print("=" * 80)
|
||||
print("PYTORCH GPU vs CPU BENCHMARK TEST")
|
||||
print("=" * 80)
|
||||
|
||||
# Model definition
|
||||
class SimpleModel(nn.Module):
|
||||
def __init__(self):
|
||||
super(SimpleModel, self).__init__()
|
||||
self.fc1 = nn.Linear(784, 1000)
|
||||
self.fc2 = nn.Linear(1000, 1000)
|
||||
self.fc3 = nn.Linear(1000, 10)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu(self.fc1(x))
|
||||
x = self.relu(self.fc2(x))
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
|
||||
# Dummy data - larger dataset
|
||||
x = torch.randn(100000, 784)
|
||||
y = torch.randint(0, 10, (100000,))
|
||||
|
||||
# Loss function
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
print("\n1. GPU TRAINING")
|
||||
print("-" * 80)
|
||||
model_gpu = SimpleModel().cuda() # Move to GPU
|
||||
optimizer_gpu = torch.optim.Adam(model_gpu.parameters())
|
||||
x_gpu = x.cuda()
|
||||
y_gpu = y.cuda()
|
||||
|
||||
print(f"Device: {next(model_gpu.parameters()).device}")
|
||||
print(f"GPU Memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
|
||||
|
||||
start_time = time.time()
|
||||
for epoch in range(20):
|
||||
optimizer_gpu.zero_grad()
|
||||
outputs = model_gpu(x_gpu)
|
||||
loss = criterion(outputs, y_gpu)
|
||||
loss.backward()
|
||||
optimizer_gpu.step()
|
||||
if (epoch + 1) % 5 == 0:
|
||||
print(f" Epoch {epoch+1}/20 - Loss: {loss.item():.4f}")
|
||||
|
||||
gpu_time = time.time() - start_time
|
||||
print(f"\nGPU training time: {gpu_time:.2f} seconds")
|
||||
|
||||
print("\n2. CPU TRAINING")
|
||||
print("-" * 80)
|
||||
model_cpu = SimpleModel().cpu() # Stay on CPU
|
||||
optimizer_cpu = torch.optim.Adam(model_cpu.parameters())
|
||||
x_cpu = x.cpu()
|
||||
y_cpu = y.cpu()
|
||||
|
||||
print(f"Device: {next(model_cpu.parameters()).device}")
|
||||
|
||||
start_time = time.time()
|
||||
for epoch in range(20):
|
||||
optimizer_cpu.zero_grad()
|
||||
outputs = model_cpu(x_cpu)
|
||||
loss = criterion(outputs, y_cpu)
|
||||
loss.backward()
|
||||
optimizer_cpu.step()
|
||||
if (epoch + 1) % 5 == 0:
|
||||
print(f" Epoch {epoch+1}/20 - Loss: {loss.item():.4f}")
|
||||
|
||||
cpu_time = time.time() - start_time
|
||||
print(f"\nCPU training time: {cpu_time:.2f} seconds")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("RESULTS")
|
||||
print("=" * 80)
|
||||
print(f"GPU time: {gpu_time:.2f} seconds")
|
||||
print(f"CPU time: {cpu_time:.2f} seconds")
|
||||
print(f"Speedup: {cpu_time / gpu_time:.1f}x faster on GPU")
|
||||
print("=" * 80)
|
||||
207
cleanup_repo.ps1
207
cleanup_repo.ps1
|
|
@ -1,207 +0,0 @@
|
|||
# SmartCane Repository Cleanup Script
|
||||
# This script will delete unnecessary files and move experimental scripts
|
||||
# Review this script before running: .\cleanup_repo.ps1
|
||||
|
||||
Write-Host "🧹 SmartCane Repository Cleanup" -ForegroundColor Cyan
|
||||
Write-Host "================================" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
$deletedCount = 0
|
||||
$movedCount = 0
|
||||
$errors = @()
|
||||
|
||||
# ============================================================================
|
||||
# PART 1: DELETE FILES
|
||||
# ============================================================================
|
||||
|
||||
Write-Host "📁 PART 1: Deleting files..." -ForegroundColor Yellow
|
||||
Write-Host ""
|
||||
|
||||
# A) Test & Debug Scripts
|
||||
$testFiles = @(
|
||||
"r_app/test_benchmarks.R",
|
||||
"r_app/test_harvest.R",
|
||||
"r_app/test_kpis_esa.R",
|
||||
"r_app/debug_kpis.R",
|
||||
"r_app/quick_layout_test.R",
|
||||
"r_app/run_minimal_test.R"
|
||||
)
|
||||
|
||||
Write-Host "Deleting test and debug scripts..." -ForegroundColor Gray
|
||||
foreach ($file in $testFiles) {
|
||||
if (Test-Path $file) {
|
||||
Remove-Item $file -Force
|
||||
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
|
||||
$deletedCount++
|
||||
} else {
|
||||
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
|
||||
}
|
||||
}
|
||||
|
||||
# B) Output Files (.Rout)
|
||||
$routFiles = @(
|
||||
"r_app/02_ci_extraction.Rout",
|
||||
"r_app/03_interpolate_growth_model.Rout",
|
||||
"r_app/04_mosaic_creation.Rout"
|
||||
)
|
||||
|
||||
Write-Host "`nDeleting .Rout files..." -ForegroundColor Gray
|
||||
foreach ($file in $routFiles) {
|
||||
if (Test-Path $file) {
|
||||
Remove-Item $file -Force
|
||||
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
|
||||
$deletedCount++
|
||||
} else {
|
||||
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
|
||||
}
|
||||
}
|
||||
|
||||
# C) Temporary PDF Files
|
||||
$pdfFiles = @(
|
||||
"Rplots.pdf",
|
||||
"r_app/Rplots.pdf"
|
||||
)
|
||||
|
||||
Write-Host "`nDeleting temporary PDF files..." -ForegroundColor Gray
|
||||
foreach ($file in $pdfFiles) {
|
||||
if (Test-Path $file) {
|
||||
Remove-Item $file -Force
|
||||
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
|
||||
$deletedCount++
|
||||
} else {
|
||||
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
|
||||
}
|
||||
}
|
||||
|
||||
# D) Old/Deprecated Scripts
|
||||
$oldScripts = @(
|
||||
"r_app/ci_extraction.R",
|
||||
"r_app/interpolate_growth_model.R",
|
||||
"r_app/mosaic_creation.R",
|
||||
"r_app/installPackages.R",
|
||||
"r_app/packages.R",
|
||||
"generated_package_config.R"
|
||||
)
|
||||
|
||||
Write-Host "`nDeleting old/deprecated scripts..." -ForegroundColor Gray
|
||||
foreach ($file in $oldScripts) {
|
||||
if (Test-Path $file) {
|
||||
Remove-Item $file -Force
|
||||
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
|
||||
$deletedCount++
|
||||
} else {
|
||||
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
|
||||
}
|
||||
}
|
||||
|
||||
# E) Generated Word Documents
|
||||
$wordDocs = @(
|
||||
"r_app/CI_report.docx",
|
||||
"r_app/CI_report2.docx",
|
||||
"r_app/CI_report_age_filtered.docx",
|
||||
"r_app/CI_report_last_week.docx",
|
||||
"r_app/CI_report_week38_corrected.docx",
|
||||
"r_app/CI_report_with_kpis_aura.docx",
|
||||
"r_app/CI_report_with_kpis_esa.docx",
|
||||
"r_app/05_CI_report_dashboard_planet.docx",
|
||||
"r_app/10_CI_report_with_kpis_simple.docx",
|
||||
"r_app/script5_test.docx",
|
||||
"r_app/test_kpi_grid.docx",
|
||||
"r_app/output/aura/crop_analysis_AURA_w36vs35_20250916_1631.docx",
|
||||
"r_app/output/reports/CI_report_with_kpis_simple_test.docx",
|
||||
"r_app/output/CI_report_2x3_layout.docx",
|
||||
"r_app/output/CI_report_consolidated.docx",
|
||||
"r_app/output/CI_report_layout_test.docx",
|
||||
"r_app/output/test_clean.docx",
|
||||
"r_app/output/test_grid.docx",
|
||||
"r_app/output/test_kables.docx",
|
||||
"r_app/output/test_merged.docx"
|
||||
)
|
||||
|
||||
Write-Host "`nDeleting generated Word documents (keeping word-styles-reference-var1.docx)..." -ForegroundColor Gray
|
||||
foreach ($file in $wordDocs) {
|
||||
if (Test-Path $file) {
|
||||
Remove-Item $file -Force
|
||||
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
|
||||
$deletedCount++
|
||||
} else {
|
||||
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PART 2: MOVE FILES TO EXPERIMENTS
|
||||
# ============================================================================
|
||||
|
||||
Write-Host "`n`n📁 PART 2: Moving files to experiments..." -ForegroundColor Yellow
|
||||
Write-Host ""
|
||||
|
||||
# Create destination directories
|
||||
$destDirs = @(
|
||||
"r_app/experiments/reports",
|
||||
"r_app/experiments/legacy_package_management"
|
||||
)
|
||||
|
||||
foreach ($dir in $destDirs) {
|
||||
if (!(Test-Path $dir)) {
|
||||
New-Item -ItemType Directory -Path $dir -Force | Out-Null
|
||||
Write-Host " Created directory: $dir" -ForegroundColor Cyan
|
||||
}
|
||||
}
|
||||
|
||||
# Move experimental Rmd files
|
||||
$rmdFiles = @(
|
||||
@{Source="r_app/CI_report_dashboard_planet.Rmd"; Dest="r_app/experiments/reports/"},
|
||||
@{Source="r_app/CI_report_dashboard_planet_enhanced.Rmd"; Dest="r_app/experiments/reports/"},
|
||||
@{Source="r_app/CI_report_executive_summary.Rmd"; Dest="r_app/experiments/reports/"},
|
||||
@{Source="r_app/simple_kpi_report.Rmd"; Dest="r_app/experiments/reports/"},
|
||||
@{Source="r_app/test_kpi_grid.Rmd"; Dest="r_app/experiments/reports/"},
|
||||
@{Source="r_app/test_minimal.Rmd"; Dest="r_app/experiments/reports/"}
|
||||
)
|
||||
|
||||
Write-Host "Moving experimental Rmd files..." -ForegroundColor Gray
|
||||
foreach ($file in $rmdFiles) {
|
||||
if (Test-Path $file.Source) {
|
||||
Move-Item $file.Source $file.Dest -Force
|
||||
Write-Host " ✓ Moved: $($file.Source) → $($file.Dest)" -ForegroundColor Green
|
||||
$movedCount++
|
||||
} else {
|
||||
Write-Host " ⚠ Not found: $($file.Source)" -ForegroundColor DarkGray
|
||||
}
|
||||
}
|
||||
|
||||
# Move legacy package management scripts
|
||||
$legacyFiles = @(
|
||||
@{Source="r_app/extract_current_versions.R"; Dest="r_app/experiments/legacy_package_management/"},
|
||||
@{Source="r_app/package_manager.R"; Dest="r_app/experiments/legacy_package_management/"}
|
||||
)
|
||||
|
||||
Write-Host "`nMoving legacy package management scripts..." -ForegroundColor Gray
|
||||
foreach ($file in $legacyFiles) {
|
||||
if (Test-Path $file.Source) {
|
||||
Move-Item $file.Source $file.Dest -Force
|
||||
Write-Host " ✓ Moved: $($file.Source) → $($file.Dest)" -ForegroundColor Green
|
||||
$movedCount++
|
||||
} else {
|
||||
Write-Host " ⚠ Not found: $($file.Source)" -ForegroundColor DarkGray
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# SUMMARY
|
||||
# ============================================================================
|
||||
|
||||
Write-Host "`n`n📊 CLEANUP SUMMARY" -ForegroundColor Cyan
|
||||
Write-Host "==================" -ForegroundColor Cyan
|
||||
Write-Host "Files deleted: $deletedCount" -ForegroundColor Green
|
||||
Write-Host "Files moved: $movedCount" -ForegroundColor Green
|
||||
|
||||
if ($errors.Count -gt 0) {
|
||||
Write-Host "`n⚠️ Errors encountered: $($errors.Count)" -ForegroundColor Red
|
||||
foreach ($err in $errors) {
|
||||
Write-Host " $err" -ForegroundColor Red
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host "`n✅ Cleanup completed!" -ForegroundColor Green
|
||||
Write-Host "`nNext step: Update .gitignore (see instructions)" -ForegroundColor Yellow
|
||||
|
|
@ -1,177 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
CONVERT_ANGATA_HARVEST.PY
|
||||
=========================
|
||||
Converts Angata harvest data from its received format to the standardized SmartCane format.
|
||||
|
||||
Input format (as received from Angata):
|
||||
Contract No | Field No | dop/doh
|
||||
0001 | 1 | 01/06/2023
|
||||
|
||||
Output format (SmartCane standard, matching Aura):
|
||||
field | sub_field | year | season_start | season_end | age | sub_area | tonnage_ha
|
||||
|
||||
The script:
|
||||
1. Reads Angata harvest.xlsx
|
||||
2. Extracts field numbers and dates
|
||||
3. Creates field names from field numbers (e.g., "Field_1", "Field_2", etc.)
|
||||
4. Extracts year from date
|
||||
5. Uses dop/doh as season_start (other fields left as NaN for now)
|
||||
6. Writes output to harvest.xlsx in SmartCane format
|
||||
|
||||
Usage:
|
||||
python convert_angata_harvest.py
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def convert_angata_harvest():
|
||||
"""Convert Angata harvest data to SmartCane format."""
|
||||
|
||||
# Define paths
|
||||
angata_dir = Path("laravel_app/storage/app/angata/Data")
|
||||
input_file = angata_dir / "harvest.xlsx"
|
||||
output_file = angata_dir / "harvest.xlsx"
|
||||
|
||||
# Read all sheets from input file
|
||||
print(f"Reading Angata harvest data from: {input_file}")
|
||||
xls = pd.ExcelFile(input_file)
|
||||
print(f"Sheet names found: {xls.sheet_names}")
|
||||
|
||||
# Collect all data from all sheets
|
||||
all_data = []
|
||||
|
||||
for sheet_name in xls.sheet_names:
|
||||
print(f"\nProcessing sheet: {sheet_name}")
|
||||
df = pd.read_excel(input_file, sheet_name=sheet_name)
|
||||
|
||||
# Remove any completely empty rows
|
||||
df = df.dropna(how='all')
|
||||
|
||||
# Skip if no data
|
||||
if len(df) == 0:
|
||||
print(f" Sheet {sheet_name} is empty, skipping")
|
||||
continue
|
||||
|
||||
# Check if this sheet has the required Field No column
|
||||
if 'Field No' not in df.columns:
|
||||
print(f" Sheet {sheet_name} does not have 'Field No' column, skipping")
|
||||
continue
|
||||
|
||||
# Check for date column (can be dop/doh or doh/dop)
|
||||
date_col = None
|
||||
if 'dop/doh' in df.columns:
|
||||
date_col = 'dop/doh'
|
||||
elif 'doh/dop' in df.columns:
|
||||
date_col = 'doh/dop'
|
||||
else:
|
||||
print(f" Sheet {sheet_name} does not have date column (dop/doh or doh/dop), skipping")
|
||||
continue
|
||||
|
||||
# Standardize date column name to 'dop/doh' for consistency
|
||||
df = df.rename(columns={date_col: 'dop/doh'})
|
||||
|
||||
# Clean field numbers that may contain garbage
|
||||
df = df[pd.notna(df['Field No'])]
|
||||
|
||||
print(f" Loaded {len(df)} records from {sheet_name}")
|
||||
all_data.append(df)
|
||||
|
||||
# Combine all sheets
|
||||
if not all_data:
|
||||
raise ValueError("No valid data found in any sheet")
|
||||
|
||||
print(f"\nCombining data from {len(all_data)} sheets...")
|
||||
df = pd.concat(all_data, ignore_index=True)
|
||||
df = df.dropna(how='all') # Remove empty rows after concat
|
||||
df = df[pd.notna(df['Field No'])] # Ensure no NaN field numbers
|
||||
|
||||
print(f"Total records after combining: {len(df)}")
|
||||
|
||||
# Validate input columns
|
||||
required_cols = ['Field No', 'dop/doh']
|
||||
for col in required_cols:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
# Create conversion dataframe
|
||||
converted = pd.DataFrame()
|
||||
|
||||
# Field name = field number as string (e.g., "1", "2", "10")
|
||||
converted['field'] = df['Field No'].astype(str)
|
||||
|
||||
# Sub-field is same as field
|
||||
converted['sub_field'] = converted['field']
|
||||
|
||||
# Parse dop/doh dates - format is DD/MM/YYYY
|
||||
print("\nParsing dates...")
|
||||
dates = []
|
||||
years = []
|
||||
for idx, date_str in enumerate(df['dop/doh']):
|
||||
try:
|
||||
# Handle NaN/null values
|
||||
if pd.isna(date_str):
|
||||
dates.append(pd.NaT)
|
||||
years.append(None)
|
||||
else:
|
||||
# Parse date string in DD/MM/YYYY format
|
||||
date_obj = pd.to_datetime(date_str, format='%d/%m/%Y')
|
||||
dates.append(date_obj)
|
||||
years.append(int(date_obj.year))
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not parse date at row {idx}: {date_str} - {e}")
|
||||
dates.append(pd.NaT)
|
||||
years.append(None)
|
||||
|
||||
# Ensure lists match DataFrame length (handle edge cases)
|
||||
assert len(dates) == len(df), f"Date list length {len(dates)} != DataFrame length {len(df)}"
|
||||
assert len(years) == len(df), f"Years list length {len(years)} != DataFrame length {len(df)}"
|
||||
|
||||
converted['season_start'] = dates
|
||||
converted['year'] = years
|
||||
|
||||
# Convert year to integer (handle NaN values)
|
||||
converted['year'] = converted['year'].apply(lambda x: int(x) if pd.notna(x) else None)
|
||||
|
||||
# Other fields (not provided in Angata data)
|
||||
# season_end: empty string (to be filled in by other scripts)
|
||||
converted['season_end'] = ""
|
||||
# Replace NaN with None for age, sub_area, tonnage_ha
|
||||
converted['age'] = None
|
||||
converted['sub_area'] = None
|
||||
converted['tonnage_ha'] = None
|
||||
|
||||
# Ensure year is integer type in DataFrame
|
||||
converted['year'] = converted['year'].astype('Int64') # Nullable integer type
|
||||
|
||||
# Reorder columns to match Aura format
|
||||
converted = converted[['field', 'sub_field', 'year', 'season_start', 'season_end', 'age', 'sub_area', 'tonnage_ha']]
|
||||
|
||||
# Display summary
|
||||
print("\nConversion summary:")
|
||||
print(f" Total records: {len(converted)}")
|
||||
print(f" Date range: {converted['season_start'].min()} to {converted['season_start'].max()}")
|
||||
print(f" Years: {sorted(converted['year'].dropna().unique())}")
|
||||
print(f"\nFirst 10 rows:")
|
||||
print(converted.head(10))
|
||||
|
||||
# Save to Excel
|
||||
print(f"\nSaving converted data to: {output_file}")
|
||||
converted.to_excel(output_file, index=False, sheet_name='Harvest')
|
||||
print("Conversion complete!")
|
||||
|
||||
return converted
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
result = convert_angata_harvest()
|
||||
print("\nSuccess! Angata harvest data has been converted to SmartCane format.")
|
||||
except Exception as e:
|
||||
print(f"\nError during conversion: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
# SmartCane Data Validation Tool
|
||||
|
||||
A standalone, client-side data validation tool for validating Excel harvest data and GeoJSON field boundaries before uploading to the SmartCane system.
|
||||
|
||||
## Features
|
||||
|
||||
### 🚦 Traffic Light System
|
||||
- **🟢 GREEN**: All checks passed
|
||||
- **🟡 YELLOW**: Warnings detected (non-critical issues)
|
||||
- **🔴 RED**: Errors detected (blocking issues)
|
||||
|
||||
### ✅ Validation Checks
|
||||
|
||||
1. **Excel Column Validation**
|
||||
- Checks for all 8 required columns: `field`, `sub_field`, `year`, `season_start`, `season_end`, `age`, `sub_area`, `tonnage_ha`
|
||||
- Identifies extra columns that will be ignored
|
||||
- Shows missing columns that must be added
|
||||
|
||||
2. **GeoJSON Properties Validation**
|
||||
- Checks all features have required properties: `field`, `sub_field`
|
||||
- Identifies redundant properties that will be ignored
|
||||
|
||||
3. **Coordinate Reference System (CRS)**
|
||||
- Validates correct CRS: **EPSG:32736 (UTM Zone 36S)**
|
||||
- This CRS was validated from your Angata farm coordinates
|
||||
- Explains why this specific CRS is required
|
||||
|
||||
4. **Field Name Matching**
|
||||
- Compares field names between Excel and GeoJSON
|
||||
- Shows which fields exist in only one dataset
|
||||
- Highlights misspellings or missing fields
|
||||
- Provides complete matching summary table
|
||||
|
||||
5. **Data Type & Content Validation**
|
||||
- Checks column data types:
|
||||
- `year`: Must be integer
|
||||
- `season_start`, `season_end`: Must be valid dates
|
||||
- `age`, `sub_area`, `tonnage_ha`: Must be numeric (decimal)
|
||||
- Identifies rows with missing `season_start` dates
|
||||
- Flags invalid date formats and numeric values
|
||||
|
||||
## File Requirements
|
||||
|
||||
### Excel File (harvest.xlsx)
|
||||
```
|
||||
| field | sub_field | year | season_start | season_end | age | sub_area | tonnage_ha |
|
||||
|----------|------------------|------|--------------|------------|-----|----------|-----------|
|
||||
| kowawa | kowawa | 2023 | 2023-01-15 | 2024-01-14 | 1.5 | 45 | 125.5 |
|
||||
| Tamu | Tamu Upper | 2023 | 2023-02-01 | 2024-01-31 | 1.0 | 30 | 98.0 |
|
||||
```
|
||||
|
||||
**Data Types:**
|
||||
- `field`, `sub_field`: Text (can be numeric as text)
|
||||
- `year`: Integer
|
||||
- `season_start`, `season_end`: Date (YYYY-MM-DD format)
|
||||
- `age`, `sub_area`, `tonnage_ha`: Decimal/Float
|
||||
|
||||
**Extra columns** are allowed but will not be processed.
|
||||
|
||||
### GeoJSON File (pivot.geojson)
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": {
|
||||
"type": "name",
|
||||
"properties": {
|
||||
"name": "urn:ogc:def:crs:EPSG::32736"
|
||||
}
|
||||
},
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {
|
||||
"field": "kowawa",
|
||||
"sub_field": "kowawa"
|
||||
},
|
||||
"geometry": {
|
||||
"type": "MultiPolygon",
|
||||
"coordinates": [...]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Required Properties:**
|
||||
- `field`: Field identifier (must match Excel)
|
||||
- `sub_field`: Sub-field identifier (must match Excel)
|
||||
|
||||
**Optional Properties:**
|
||||
- `STATUS`, `name`, `age`, etc. - These are allowed but not required
|
||||
|
||||
**CRS:**
|
||||
- Must be EPSG:32736 (UTM Zone 36S)
|
||||
- This was determined from analyzing your Angata farm coordinates
|
||||
|
||||
## Deployment
|
||||
|
||||
### Local Use (Recommended for Security)
|
||||
1. Download the `data_validation_tool` folder
|
||||
2. Open `index.html` in a web browser
|
||||
3. Files are processed entirely client-side - no data is sent to servers
|
||||
|
||||
### Netlify Deployment
|
||||
1. Connect to your GitHub repository
|
||||
2. Set build command: `None`
|
||||
3. Set publish directory: `data_validation_tool`
|
||||
4. Deploy
|
||||
|
||||
Or use Netlify CLI:
|
||||
```bash
|
||||
npm install -g netlify-cli
|
||||
netlify deploy --dir data_validation_tool
|
||||
```
|
||||
|
||||
### Manual Testing
|
||||
1. Use the provided sample files:
|
||||
- Excel: `laravel_app/storage/app/aura/Data/harvest.xlsx`
|
||||
- GeoJSON: `laravel_app/storage/app/aura/Data/pivot.geojson`
|
||||
2. Open `index.html`
|
||||
3. Upload both files
|
||||
4. Review validation results
|
||||
|
||||
## Technical Details
|
||||
|
||||
### Browser Requirements
|
||||
- Modern browser with ES6 support (Chrome, Firefox, Safari, Edge)
|
||||
- Must support FileReader API and JSON parsing
|
||||
- Requires XLSX library for Excel parsing
|
||||
|
||||
### Dependencies
|
||||
- **XLSX.js**: For reading Excel files (loaded via CDN in index.html)
|
||||
|
||||
### What Happens When You Upload
|
||||
1. File is read into memory (client-side only)
|
||||
2. Excel: Parsed using XLSX library into JSON
|
||||
3. GeoJSON: Parsed directly as JSON
|
||||
4. All validation runs in your browser
|
||||
5. Results displayed locally
|
||||
6. **No files are sent to any server**
|
||||
|
||||
## Validation Rules
|
||||
|
||||
### Traffic Light Logic
|
||||
|
||||
**All GREEN (✓ Passed)**
|
||||
- All required columns/properties present
|
||||
- Correct CRS
|
||||
- All field names match
|
||||
- All data types valid
|
||||
|
||||
**YELLOW (⚠️ Warnings)**
|
||||
- Extra columns detected (will be ignored)
|
||||
- Extra properties detected (will be ignored)
|
||||
- Missing dates in some fields
|
||||
- Data type issues in specific rows
|
||||
|
||||
**RED (✗ Failed)**
|
||||
- Missing required columns/properties
|
||||
- Wrong CRS
|
||||
- Field names mismatch between files
|
||||
- Fundamental data structure issues
|
||||
|
||||
### CRS Explanation
|
||||
|
||||
From your project's geospatial analysis:
|
||||
- **Original issue**: Angata farm GeoJSON had coordinates in UTM Zone 37S but marked as WGS84
|
||||
- **Root cause**: UTM Zone mismatch - farm is actually in UTM Zone 36S
|
||||
- **Solution**: Reproject to EPSG:32736 (UTM Zone 36S)
|
||||
- **Why**: This aligns with actual Angata farm coordinates (longitude ~34.4°E)
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "Failed to read Excel file"
|
||||
- Ensure file is `.xlsx` format
|
||||
- File should not be open in Excel while uploading
|
||||
- Try saving as Excel 2007+ format
|
||||
|
||||
### "Failed to parse GeoJSON"
|
||||
- Ensure file is valid JSON
|
||||
- Check for syntax errors (extra commas, missing brackets)
|
||||
- Use online JSON validator at jsonlint.com
|
||||
|
||||
### "Wrong CRS detected"
|
||||
- GeoJSON must explicitly state CRS as EPSG:32736
|
||||
- Example: `"name": "urn:ogc:def:crs:EPSG::32736"`
|
||||
- Reproject in QGIS or R if needed
|
||||
|
||||
### "Field names don't match"
|
||||
- Check for typos and capitalization differences
|
||||
- Spaces at beginning/end of field names
|
||||
- Use field names exactly as they appear in both files
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
- [ ] Download validation report as PDF
|
||||
- [ ] Batch upload multiple Excel/GeoJSON pairs
|
||||
- [ ] Auto-detect and suggest field mappings
|
||||
- [ ] Geometry validity checks (self-intersecting polygons)
|
||||
- [ ] Area comparison between Excel and GeoJSON
|
||||
- [ ] Export cleaned/standardized files
|
||||
|
||||
## Support
|
||||
|
||||
For questions about data validation requirements, contact the SmartCane team.
|
||||
|
||||
---
|
||||
|
||||
**Tool Version**: 1.0
|
||||
**Last Updated**: December 2025
|
||||
**CRS Reference**: EPSG:32736 (UTM Zone 36S)
|
||||
|
|
@ -1,396 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>SmartCane Data Validation Tool</title>
|
||||
<style>
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
min-height: 100vh;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
header {
|
||||
background: white;
|
||||
padding: 30px;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
h1 {
|
||||
color: #333;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.subtitle {
|
||||
color: #666;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.upload-section {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.upload-card {
|
||||
background: white;
|
||||
padding: 30px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.upload-card h2 {
|
||||
font-size: 18px;
|
||||
color: #333;
|
||||
margin-bottom: 15px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.file-icon {
|
||||
font-size: 24px;
|
||||
}
|
||||
|
||||
.file-input-wrapper {
|
||||
position: relative;
|
||||
display: inline-block;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.file-input-label {
|
||||
display: block;
|
||||
padding: 20px;
|
||||
border: 2px dashed #667eea;
|
||||
border-radius: 6px;
|
||||
text-align: center;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s;
|
||||
background: #f8f9ff;
|
||||
}
|
||||
|
||||
.file-input-label:hover {
|
||||
border-color: #764ba2;
|
||||
background: #f0f1ff;
|
||||
}
|
||||
|
||||
.file-input-wrapper input[type="file"] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.file-name {
|
||||
margin-top: 10px;
|
||||
font-size: 14px;
|
||||
color: #667eea;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.results-section {
|
||||
background: white;
|
||||
padding: 30px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
display: none;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
.results-section.show {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.results-section h2 {
|
||||
color: #333;
|
||||
margin-bottom: 25px;
|
||||
padding-bottom: 15px;
|
||||
border-bottom: 3px solid #667eea;
|
||||
}
|
||||
|
||||
.traffic-light {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
gap: 15px;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.check-item {
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 12px;
|
||||
font-weight: 500;
|
||||
border-left: 4px solid;
|
||||
}
|
||||
|
||||
.check-item.pass {
|
||||
background: #d4edda;
|
||||
color: #155724;
|
||||
border-left-color: #28a745;
|
||||
}
|
||||
|
||||
.check-item.warning {
|
||||
background: #fff3cd;
|
||||
color: #856404;
|
||||
border-left-color: #ffc107;
|
||||
}
|
||||
|
||||
.check-item.fail {
|
||||
background: #f8d7da;
|
||||
color: #721c24;
|
||||
border-left-color: #dc3545;
|
||||
}
|
||||
|
||||
.light {
|
||||
font-size: 24px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.light.green::before { content: "🟢"; }
|
||||
.light.yellow::before { content: "🟡"; }
|
||||
.light.red::before { content: "🔴"; }
|
||||
|
||||
.details-section {
|
||||
margin-top: 30px;
|
||||
border-top: 1px solid #eee;
|
||||
padding-top: 20px;
|
||||
}
|
||||
|
||||
.details-section h3 {
|
||||
font-size: 16px;
|
||||
color: #333;
|
||||
margin-bottom: 15px;
|
||||
padding-bottom: 10px;
|
||||
border-bottom: 2px solid #667eea;
|
||||
margin-top: 25px;
|
||||
}
|
||||
|
||||
.details-section > div:first-child h3 {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
.message-box {
|
||||
padding: 15px;
|
||||
margin-bottom: 15px;
|
||||
border-radius: 6px;
|
||||
font-size: 14px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.message-box.error {
|
||||
background: #f8d7da;
|
||||
color: #721c24;
|
||||
border-left: 4px solid #dc3545;
|
||||
}
|
||||
|
||||
.message-box.warning {
|
||||
background: #fff3cd;
|
||||
color: #856404;
|
||||
border-left: 4px solid #ffc107;
|
||||
}
|
||||
|
||||
.message-box.info {
|
||||
background: #d1ecf1;
|
||||
color: #0c5460;
|
||||
border-left: 4px solid #17a2b8;
|
||||
}
|
||||
|
||||
.message-box.success {
|
||||
background: #d4edda;
|
||||
color: #155724;
|
||||
border-left: 4px solid #28a745;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-top: 15px;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
th {
|
||||
background: #667eea;
|
||||
color: white;
|
||||
padding: 12px;
|
||||
text-align: left;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
td {
|
||||
padding: 10px 12px;
|
||||
border-bottom: 1px solid #eee;
|
||||
}
|
||||
|
||||
tr:hover {
|
||||
background: #f8f9ff;
|
||||
}
|
||||
|
||||
.match {
|
||||
color: #28a745;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.mismatch {
|
||||
color: #dc3545;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.missing {
|
||||
color: #ffc107;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.field-list {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
|
||||
gap: 10px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.field-badge {
|
||||
background: #e9ecef;
|
||||
padding: 8px 12px;
|
||||
border-radius: 4px;
|
||||
font-size: 13px;
|
||||
border-left: 3px solid;
|
||||
}
|
||||
|
||||
.field-badge.missing {
|
||||
background: #fff3cd;
|
||||
border-left-color: #ffc107;
|
||||
color: #856404;
|
||||
}
|
||||
|
||||
.field-badge.extra {
|
||||
background: #d1ecf1;
|
||||
border-left-color: #17a2b8;
|
||||
color: #0c5460;
|
||||
}
|
||||
|
||||
.validation-row {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
||||
gap: 10px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.validation-item {
|
||||
background: #f8f9ff;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
font-size: 13px;
|
||||
border-left: 3px solid;
|
||||
}
|
||||
|
||||
.validation-item.valid {
|
||||
border-left-color: #28a745;
|
||||
}
|
||||
|
||||
.validation-item.invalid {
|
||||
border-left-color: #dc3545;
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.upload-section {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.traffic-light {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
footer {
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
margin-top: 20px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
text-align: center;
|
||||
font-size: 13px;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
footer a {
|
||||
color: #667eea;
|
||||
text-decoration: none;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
footer a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<h1>🌾 SmartCane Data Validation Tool</h1>
|
||||
<p class="subtitle">Validate your Excel and GeoJSON files before uploading to the system</p>
|
||||
</header>
|
||||
|
||||
<div class="upload-section">
|
||||
<div class="upload-card">
|
||||
<h2><span class="file-icon">📊</span>Excel File (Harvest Data)</h2>
|
||||
<p style="font-size: 13px; color: #666; margin-bottom: 15px;">Required columns: field, sub_field, year, season_start, season_end, age, sub_area, tonnage_ha</p>
|
||||
<div class="file-input-wrapper" id="excelDropZone">
|
||||
<label class="file-input-label" for="excelFile">
|
||||
<div>Drop your Excel file here<br><small>or click to browse</small></div>
|
||||
<div class="file-name" id="excelFileName"></div>
|
||||
</label>
|
||||
<input type="file" id="excelFile" accept=".xlsx,.xls" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="upload-card">
|
||||
<h2><span class="file-icon">🗺️</span>GeoJSON File (Field Boundaries)</h2>
|
||||
<p style="font-size: 13px; color: #666; margin-bottom: 15px;">Required properties: field, sub_field</p>
|
||||
<div class="file-input-wrapper" id="geojsonDropZone">
|
||||
<label class="file-input-label" for="geojsonFile">
|
||||
<div>Drop your GeoJSON file here<br><small>or click to browse</small></div>
|
||||
<div class="file-name" id="geojsonFileName"></div>
|
||||
</label>
|
||||
<input type="file" id="geojsonFile" accept=".geojson,.json" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="text-align: center; margin-bottom: 20px;">
|
||||
<button id="checkButton" style="padding: 12px 40px; font-size: 16px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none; border-radius: 6px; cursor: pointer; font-weight: 600; display: none;">
|
||||
✓ Check Files
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="results-section" id="resultsSection">
|
||||
<h2 style="margin-bottom: 20px; color: #333;">Validation Results</h2>
|
||||
|
||||
<div class="traffic-light" id="trafficLight"></div>
|
||||
|
||||
<div class="details-section" id="detailsSection"></div>
|
||||
</div>
|
||||
|
||||
<footer>
|
||||
SmartCane Data Validation Tool | Learn more at <a href="https://www.smartcane.ag" target="_blank">www.smartcane.ag</a>
|
||||
</footer>
|
||||
</div>
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js"></script>
|
||||
<script src="validator.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,698 +0,0 @@
|
|||
// Configuration
|
||||
const CONFIG = {
|
||||
REQUIRED_EXCEL_COLUMNS: ['field', 'sub_field', 'year', 'season_start', 'season_end', 'tonnage_ha'],
|
||||
OPTIONAL_EXCEL_COLUMNS: ['age', 'sub_area'], // age is calculated in script, sub_area is optional
|
||||
REQUIRED_GEOJSON_PROPERTIES: ['field', 'sub_field'],
|
||||
VALID_CRS: 'EPSG:32736', // UTM 36S - the correct CRS we learned from the conversation
|
||||
CRS_DESCRIPTION: 'EPSG:32736 (UTM Zone 36S) - This is the correct CRS learned from geospatial analysis of Angata farm coordinates'
|
||||
};
|
||||
|
||||
let excelData = null;
|
||||
let geojsonData = null;
|
||||
let excelLoaded = false;
|
||||
let geojsonLoaded = false;
|
||||
|
||||
// File input handlers
|
||||
document.getElementById('excelFile').addEventListener('change', handleExcelFile);
|
||||
document.getElementById('geojsonFile').addEventListener('change', handleGeojsonFile);
|
||||
document.getElementById('checkButton').addEventListener('click', validateData);
|
||||
|
||||
function updateCheckButton() {
|
||||
const checkButton = document.getElementById('checkButton');
|
||||
if (excelLoaded && geojsonLoaded) {
|
||||
checkButton.style.display = 'inline-block';
|
||||
} else {
|
||||
checkButton.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// Drag and drop handlers for Excel
|
||||
const excelDropZone = document.getElementById('excelDropZone');
|
||||
excelDropZone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
excelDropZone.style.backgroundColor = '#f0f1ff';
|
||||
});
|
||||
excelDropZone.addEventListener('dragleave', (e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
excelDropZone.style.backgroundColor = 'transparent';
|
||||
});
|
||||
excelDropZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
excelDropZone.style.backgroundColor = 'transparent';
|
||||
const files = e.dataTransfer.files;
|
||||
if (files.length > 0) {
|
||||
document.getElementById('excelFile').files = files;
|
||||
handleExcelFile({ target: { files: files } });
|
||||
}
|
||||
});
|
||||
|
||||
// Drag and drop handlers for GeoJSON
|
||||
const geojsonDropZone = document.getElementById('geojsonDropZone');
|
||||
geojsonDropZone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
geojsonDropZone.style.backgroundColor = '#f0f1ff';
|
||||
});
|
||||
geojsonDropZone.addEventListener('dragleave', (e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
geojsonDropZone.style.backgroundColor = 'transparent';
|
||||
});
|
||||
geojsonDropZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
geojsonDropZone.style.backgroundColor = 'transparent';
|
||||
const files = e.dataTransfer.files;
|
||||
if (files.length > 0) {
|
||||
document.getElementById('geojsonFile').files = files;
|
||||
handleGeojsonFile({ target: { files: files } });
|
||||
}
|
||||
});
|
||||
|
||||
function handleExcelFile(e) {
|
||||
const file = e.target.files[0];
|
||||
if (!file) return;
|
||||
|
||||
document.getElementById('excelFileName').textContent = `✓ ${file.name}`;
|
||||
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
try {
|
||||
const data = new Uint8Array(event.target.result);
|
||||
const workbook = XLSX.read(data, { type: 'array' });
|
||||
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
|
||||
excelData = XLSX.utils.sheet_to_json(worksheet);
|
||||
excelLoaded = true;
|
||||
updateCheckButton();
|
||||
} catch (error) {
|
||||
document.getElementById('excelFileName').textContent = `✗ Error: ${error.message}`;
|
||||
excelLoaded = false;
|
||||
updateCheckButton();
|
||||
}
|
||||
};
|
||||
reader.onerror = () => {
|
||||
document.getElementById('excelFileName').textContent = `✗ Failed to read file`;
|
||||
excelLoaded = false;
|
||||
updateCheckButton();
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
}
|
||||
|
||||
function handleGeojsonFile(e) {
|
||||
const file = e.target.files[0];
|
||||
if (!file) return;
|
||||
|
||||
document.getElementById('geojsonFileName').textContent = `✓ ${file.name}`;
|
||||
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
try {
|
||||
geojsonData = JSON.parse(event.target.result);
|
||||
geojsonLoaded = true;
|
||||
updateCheckButton();
|
||||
} catch (error) {
|
||||
document.getElementById('geojsonFileName').textContent = `✗ Invalid JSON: ${error.message}`;
|
||||
geojsonLoaded = false;
|
||||
updateCheckButton();
|
||||
}
|
||||
};
|
||||
reader.onerror = () => {
|
||||
document.getElementById('geojsonFileName').textContent = `✗ Failed to read file`;
|
||||
geojsonLoaded = false;
|
||||
updateCheckButton();
|
||||
};
|
||||
reader.readAsText(file);
|
||||
}
|
||||
|
||||
function validateData() {
|
||||
if (!excelData || !geojsonData) {
|
||||
alert('Please upload both Excel and GeoJSON files before checking.');
|
||||
return;
|
||||
}
|
||||
|
||||
const results = {
|
||||
checks: [],
|
||||
details: []
|
||||
};
|
||||
|
||||
// 1. Excel column validation
|
||||
const excelColumnCheck = validateExcelColumns();
|
||||
results.checks.push(excelColumnCheck);
|
||||
results.details.push(excelColumnCheck.details);
|
||||
|
||||
// 2. GeoJSON properties validation
|
||||
const geojsonPropsCheck = validateGeojsonProperties();
|
||||
results.checks.push(geojsonPropsCheck);
|
||||
results.details.push(geojsonPropsCheck.details);
|
||||
|
||||
// 3. CRS validation
|
||||
const crsCheck = validateCRS();
|
||||
results.checks.push(crsCheck);
|
||||
results.details.push(crsCheck.details);
|
||||
|
||||
// 4. Field name matching
|
||||
const fieldMatchCheck = validateFieldMatching();
|
||||
results.checks.push(fieldMatchCheck);
|
||||
results.details.push(fieldMatchCheck.details);
|
||||
|
||||
// 5. Data type and content validation
|
||||
const dataValidationCheck = validateDataTypes();
|
||||
results.checks.push(dataValidationCheck);
|
||||
results.details.push(dataValidationCheck.details);
|
||||
|
||||
displayResults(results);
|
||||
}
|
||||
|
||||
function validateExcelColumns() {
|
||||
const excelColumns = Object.keys(excelData[0] || {});
|
||||
const missing = CONFIG.REQUIRED_EXCEL_COLUMNS.filter(col => !excelColumns.includes(col));
|
||||
const hasOptional = CONFIG.OPTIONAL_EXCEL_COLUMNS.filter(col => excelColumns.includes(col));
|
||||
const notRequired = excelColumns.filter(col => !CONFIG.REQUIRED_EXCEL_COLUMNS.includes(col) && !CONFIG.OPTIONAL_EXCEL_COLUMNS.includes(col));
|
||||
|
||||
let status = 'pass';
|
||||
let message = 'All required columns present';
|
||||
|
||||
if (missing.length > 0) {
|
||||
status = 'fail';
|
||||
message = `Missing required columns: ${missing.join(', ')}`;
|
||||
} else if (notRequired.length > 0) {
|
||||
status = 'warning';
|
||||
message = `Extra columns detected (will be ignored): ${notRequired.join(', ')}`;
|
||||
}
|
||||
|
||||
return {
|
||||
name: 'Excel Columns',
|
||||
status: status,
|
||||
message: message,
|
||||
details: {
|
||||
title: 'Excel Column Validation',
|
||||
type: 'columns',
|
||||
required: CONFIG.REQUIRED_EXCEL_COLUMNS,
|
||||
optional: CONFIG.OPTIONAL_EXCEL_COLUMNS,
|
||||
found: excelColumns,
|
||||
missing: missing,
|
||||
hasOptional: hasOptional,
|
||||
extra: notRequired
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function validateGeojsonProperties() {
|
||||
if (!geojsonData.features || geojsonData.features.length === 0) {
|
||||
return {
|
||||
name: 'GeoJSON Properties',
|
||||
status: 'fail',
|
||||
message: 'GeoJSON has no features',
|
||||
details: {
|
||||
title: 'GeoJSON Property Validation',
|
||||
type: 'properties',
|
||||
error: 'No features found in GeoJSON'
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const allProperties = new Set();
|
||||
const missingInFeatures = [];
|
||||
|
||||
geojsonData.features.forEach((feature, idx) => {
|
||||
const props = feature.properties || {};
|
||||
Object.keys(props).forEach(p => allProperties.add(p));
|
||||
|
||||
CONFIG.REQUIRED_GEOJSON_PROPERTIES.forEach(reqProp => {
|
||||
if (!props[reqProp]) {
|
||||
missingInFeatures.push({ feature: idx, property: reqProp, field: props.field || 'Unknown' });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const extra = Array.from(allProperties).filter(p => !CONFIG.REQUIRED_GEOJSON_PROPERTIES.includes(p));
|
||||
|
||||
let status = 'pass';
|
||||
let message = 'All required properties present in all features';
|
||||
|
||||
if (missingInFeatures.length > 0) {
|
||||
status = 'fail';
|
||||
message = `Missing properties in ${missingInFeatures.length} feature(s)`;
|
||||
} else if (extra.length > 0) {
|
||||
status = 'warning';
|
||||
message = `Extra properties detected: ${extra.join(', ')}`;
|
||||
}
|
||||
|
||||
return {
|
||||
name: 'GeoJSON Properties',
|
||||
status: status,
|
||||
message: message,
|
||||
details: {
|
||||
title: 'GeoJSON Property Validation',
|
||||
type: 'properties',
|
||||
required: CONFIG.REQUIRED_GEOJSON_PROPERTIES,
|
||||
found: Array.from(allProperties),
|
||||
extra: extra,
|
||||
missingInFeatures: missingInFeatures
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function validateCRS() {
|
||||
const crs = geojsonData.crs;
|
||||
let detectedCRS = 'Not specified';
|
||||
let status = 'fail';
|
||||
let message = `CRS not specified. Expected: ${CONFIG.VALID_CRS}`;
|
||||
|
||||
if (crs) {
|
||||
if (crs.type === 'name' && crs.properties?.name) {
|
||||
detectedCRS = crs.properties.name;
|
||||
// Check for various CRS string formats
|
||||
if (detectedCRS.includes('32736') || detectedCRS.includes('UTM') && detectedCRS.includes('36')) {
|
||||
status = 'pass';
|
||||
message = `✓ Correct CRS detected: ${detectedCRS}`;
|
||||
} else {
|
||||
status = 'fail';
|
||||
message = `Wrong CRS: ${detectedCRS}. Expected: ${CONFIG.VALID_CRS}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
name: 'Coordinate Reference System',
|
||||
status: status,
|
||||
message: message,
|
||||
details: {
|
||||
title: 'CRS Validation',
|
||||
type: 'crs',
|
||||
expected: CONFIG.VALID_CRS,
|
||||
description: CONFIG.CRS_DESCRIPTION,
|
||||
detected: detectedCRS,
|
||||
crsObject: crs
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function validateFieldMatching() {
|
||||
const excelFields = new Set(excelData.map(row => String(row.field).trim()));
|
||||
const geojsonFields = new Set(geojsonData.features.map(f => String(f.properties.field).trim()));
|
||||
|
||||
const matchingFields = Array.from(excelFields).filter(f => geojsonFields.has(f));
|
||||
const excelOnly = Array.from(excelFields).filter(f => !geojsonFields.has(f));
|
||||
const geojsonOnly = Array.from(geojsonFields).filter(f => !excelFields.has(f));
|
||||
|
||||
let status = 'pass';
|
||||
let message = 'All field names match between Excel and GeoJSON';
|
||||
|
||||
if (excelOnly.length > 0 || geojsonOnly.length > 0) {
|
||||
status = 'fail';
|
||||
message = `Field name mismatches detected: ${excelOnly.length} in Excel only, ${geojsonOnly.length} in GeoJSON only`;
|
||||
}
|
||||
|
||||
// Create matching table
|
||||
const matchingTable = [];
|
||||
excelFields.forEach(field => {
|
||||
const inGeojson = geojsonFields.has(field);
|
||||
matchingTable.push({
|
||||
field: field,
|
||||
excel: true,
|
||||
geojson: inGeojson,
|
||||
status: inGeojson ? 'match' : 'mismatch'
|
||||
});
|
||||
});
|
||||
|
||||
geojsonOnly.forEach(field => {
|
||||
matchingTable.push({
|
||||
field: field,
|
||||
excel: false,
|
||||
geojson: true,
|
||||
status: 'mismatch'
|
||||
});
|
||||
});
|
||||
|
||||
return {
|
||||
name: 'Field Name Matching',
|
||||
status: status,
|
||||
message: message,
|
||||
details: {
|
||||
title: 'Field Name Matching',
|
||||
type: 'fieldMatching',
|
||||
matching: matchingFields,
|
||||
excelOnly: excelOnly,
|
||||
geojsonOnly: geojsonOnly,
|
||||
matchingTable: matchingTable
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function validateDataTypes() {
|
||||
const issues = [];
|
||||
const missingDates = [];
|
||||
const invalidYears = [];
|
||||
const invalidNumerics = [];
|
||||
|
||||
excelData.forEach((row, idx) => {
|
||||
// Check season_start
|
||||
if (!row.season_start || row.season_start === '') {
|
||||
missingDates.push({ row: idx + 2, field: row.field, column: 'season_start' });
|
||||
} else if (!isValidDate(row.season_start)) {
|
||||
invalidYears.push({ row: idx + 2, field: row.field, column: 'season_start', value: row.season_start });
|
||||
}
|
||||
|
||||
// Check year
|
||||
if (!Number.isInteger(parseFloat(row.year))) {
|
||||
invalidYears.push({ row: idx + 2, field: row.field, column: 'year', value: row.year });
|
||||
}
|
||||
|
||||
// Check numeric columns (age is optional, sub_area is text, not numeric)
|
||||
['tonnage_ha'].forEach(col => {
|
||||
const val = row[col];
|
||||
if (val !== '' && val !== null && isNaN(parseFloat(val))) {
|
||||
invalidNumerics.push({ row: idx + 2, field: row.field, column: col, value: val });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
let status = 'pass';
|
||||
let message = 'All data types valid';
|
||||
|
||||
if (missingDates.length > 0 || invalidYears.length > 0 || invalidNumerics.length > 0) {
|
||||
status = 'warning';
|
||||
message = `Data validation issues found: ${missingDates.length} missing dates, ${invalidYears.length} invalid years/dates, ${invalidNumerics.length} invalid numerics`;
|
||||
}
|
||||
|
||||
return {
|
||||
name: 'Data Validation',
|
||||
status: status,
|
||||
message: message,
|
||||
details: {
|
||||
title: 'Data Type & Content Validation',
|
||||
type: 'dataValidation',
|
||||
missingDates: missingDates,
|
||||
invalidYears: invalidYears,
|
||||
invalidNumerics: invalidNumerics
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function isValidDate(dateString) {
|
||||
if (!dateString) return false;
|
||||
const date = new Date(dateString);
|
||||
return date instanceof Date && !isNaN(date);
|
||||
}
|
||||
|
||||
function displayResults(results) {
|
||||
const trafficLight = document.getElementById('trafficLight');
|
||||
const detailsSection = document.getElementById('detailsSection');
|
||||
const resultsSection = document.getElementById('resultsSection');
|
||||
|
||||
trafficLight.innerHTML = '';
|
||||
detailsSection.innerHTML = '';
|
||||
|
||||
// Display traffic lights
|
||||
results.checks.forEach(check => {
|
||||
const light = document.createElement('div');
|
||||
light.className = `check-item ${check.status}`;
|
||||
light.innerHTML = `
|
||||
<span class="light ${check.status === 'pass' ? 'green' : check.status === 'warning' ? 'yellow' : 'red'}"></span>
|
||||
<div>
|
||||
<strong>${check.name}</strong>
|
||||
<div style="font-size: 13px; margin-top: 4px;">${check.message}</div>
|
||||
</div>
|
||||
`;
|
||||
trafficLight.appendChild(light);
|
||||
});
|
||||
|
||||
// Display details
|
||||
results.details.forEach(detail => {
|
||||
if (detail.type === 'columns') {
|
||||
detailsSection.appendChild(createColumnDetails(detail));
|
||||
} else if (detail.type === 'properties') {
|
||||
detailsSection.appendChild(createPropertiesDetails(detail));
|
||||
} else if (detail.type === 'crs') {
|
||||
detailsSection.appendChild(createCRSDetails(detail));
|
||||
} else if (detail.type === 'fieldMatching') {
|
||||
detailsSection.appendChild(createFieldMatchingDetails(detail));
|
||||
} else if (detail.type === 'dataValidation') {
|
||||
detailsSection.appendChild(createDataValidationDetails(detail));
|
||||
}
|
||||
});
|
||||
|
||||
resultsSection.classList.add('show');
|
||||
}
|
||||
|
||||
function createColumnDetails(detail) {
|
||||
const section = document.createElement('div');
|
||||
section.innerHTML = `<h3>${detail.title}</h3>`;
|
||||
|
||||
// Required columns
|
||||
section.innerHTML += `
|
||||
<div style="margin-bottom: 15px;">
|
||||
<strong>Required Columns:</strong>
|
||||
<div class="field-list" style="margin-top: 8px;">
|
||||
${detail.required.map(col => `<div class="field-badge" style="border-left-color: #28a745; background: #d4edda; color: #155724;">${col}</div>`).join('')}
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// Optional columns
|
||||
if (detail.optional && detail.optional.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div style="margin-bottom: 15px;">
|
||||
<strong>Optional Columns (not required):</strong>
|
||||
<div class="field-list" style="margin-top: 8px;">
|
||||
${detail.optional.map(col => `<div class="field-badge" style="border-left-color: #17a2b8; background: #d1ecf1; color: #0c5460;">${col}</div>`).join('')}
|
||||
</div>
|
||||
<small style="display: block; margin-top: 8px;">✓ <em>${detail.optional.join(', ')} ${detail.optional.length === 1 ? 'is' : 'are'} calculated in the system or optional</em></small>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.missing.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box error">
|
||||
<strong>❌ Missing Required Columns:</strong><br>${detail.missing.join(', ')}
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.extra.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box warning">
|
||||
<strong>⚠️ Extra Columns (will be ignored):</strong><br>${detail.extra.join(', ')}
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.missing.length === 0 && detail.extra.length === 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box success">
|
||||
<strong>✓ Perfect!</strong> All required columns present.
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
return section;
|
||||
}
|
||||
|
||||
function createPropertiesDetails(detail) {
|
||||
const section = document.createElement('div');
|
||||
section.innerHTML = `<h3>${detail.title}</h3>`;
|
||||
|
||||
if (detail.error) {
|
||||
section.innerHTML += `<div class="message-box error">${detail.error}</div>`;
|
||||
return section;
|
||||
}
|
||||
|
||||
if (detail.missingInFeatures && detail.missingInFeatures.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box error">
|
||||
<strong>❌ Missing Properties in Features:</strong>
|
||||
<table>
|
||||
<tr><th>Feature #</th><th>Field Name</th><th>Missing Property</th></tr>
|
||||
${detail.missingInFeatures.map(m => `<tr><td>${m.feature}</td><td>${m.field}</td><td>${m.property}</td></tr>`).join('')}
|
||||
</table>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.extra && detail.extra.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box warning">
|
||||
<strong>⚠️ Extra Properties (redundant):</strong><br>${detail.extra.join(', ')}<br>
|
||||
<small>These will be ignored during processing.</small>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if ((!detail.missingInFeatures || detail.missingInFeatures.length === 0) && (!detail.extra || detail.extra.length === 0)) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box success">
|
||||
<strong>✓ Perfect!</strong> All required properties present in all ${geojsonData.features.length} features.
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
return section;
|
||||
}
|
||||
|
||||
function createCRSDetails(detail) {
|
||||
const section = document.createElement('div');
|
||||
section.innerHTML = `<h3>${detail.title}</h3>`;
|
||||
|
||||
if (detail.detected === 'Not specified') {
|
||||
section.innerHTML += `
|
||||
<div class="message-box error">
|
||||
<strong>❌ CRS Not Specified</strong><br>
|
||||
Expected: <code>${detail.expected}</code><br>
|
||||
${detail.description}
|
||||
</div>
|
||||
`;
|
||||
} else if (detail.detected.includes('32736') || (detail.detected.includes('UTM') && detail.detected.includes('36'))) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box success">
|
||||
<strong>✓ Correct CRS</strong><br>
|
||||
Detected: <code>${detail.detected}</code><br>
|
||||
${detail.description}
|
||||
</div>
|
||||
`;
|
||||
} else {
|
||||
section.innerHTML += `
|
||||
<div class="message-box error">
|
||||
<strong>❌ Wrong CRS</strong><br>
|
||||
Expected: <code>${detail.expected}</code><br>
|
||||
Detected: <code>${detail.detected}</code><br>
|
||||
${detail.description}
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.crsObject) {
|
||||
section.innerHTML += `
|
||||
<div style="margin-top: 15px; padding: 10px; background: #f8f9ff; border-radius: 4px; font-size: 12px;">
|
||||
<strong>CRS Details:</strong><br>
|
||||
<code>${JSON.stringify(detail.crsObject, null, 2)}</code>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
return section;
|
||||
}
|
||||
|
||||
function createFieldMatchingDetails(detail) {
|
||||
const section = document.createElement('div');
|
||||
section.innerHTML = `<h3>${detail.title}</h3>`;
|
||||
|
||||
if (detail.excelOnly.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box error">
|
||||
<strong>❌ Fields in Excel but NOT in GeoJSON (${detail.excelOnly.length}):</strong>
|
||||
<div class="field-list">
|
||||
${detail.excelOnly.map(f => `<div class="field-badge missing">${f}</div>`).join('')}
|
||||
</div>
|
||||
<small style="display: block; margin-top: 10px;">These fields exist in your harvest data but have no boundaries defined in the GeoJSON.</small>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.geojsonOnly.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box error">
|
||||
<strong>❌ Fields in GeoJSON but NOT in Excel (${detail.geojsonOnly.length}):</strong>
|
||||
<div class="field-list">
|
||||
${detail.geojsonOnly.map(f => `<div class="field-badge extra">${f}</div>`).join('')}
|
||||
</div>
|
||||
<small style="display: block; margin-top: 10px;">These fields have boundaries defined but no data in your harvest spreadsheet.</small>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.matching.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box success">
|
||||
<strong>✓ Matching Fields (${detail.matching.length}):</strong>
|
||||
<div class="field-list">
|
||||
${detail.matching.map(f => `<div class="field-badge" style="border-left-color: #28a745; background: #d4edda; color: #155724;">${f}</div>`).join('')}
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
// Full matching table
|
||||
section.innerHTML += `
|
||||
<div style="margin-top: 20px;">
|
||||
<strong>Complete Field Summary:</strong>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Field Name</th>
|
||||
<th>In Excel</th>
|
||||
<th>In GeoJSON</th>
|
||||
<th>Status</th>
|
||||
</tr>
|
||||
${detail.matchingTable.map(row => `
|
||||
<tr>
|
||||
<td><strong>${row.field}</strong></td>
|
||||
<td>${row.excel ? '✓' : '✗'}</td>
|
||||
<td>${row.geojson ? '✓' : '✗'}</td>
|
||||
<td><span class="${row.status}">${row.status === 'match' ? '🟢 Match' : '🔴 Mismatch'}</span></td>
|
||||
</tr>
|
||||
`).join('')}
|
||||
</table>
|
||||
</div>
|
||||
`;
|
||||
|
||||
return section;
|
||||
}
|
||||
|
||||
function createDataValidationDetails(detail) {
|
||||
const section = document.createElement('div');
|
||||
section.innerHTML = `<h3>${detail.title}</h3>`;
|
||||
|
||||
if (detail.missingDates.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box warning">
|
||||
<strong>⚠️ Missing season_start dates (${detail.missingDates.length}):</strong>
|
||||
<table style="font-size: 13px;">
|
||||
<tr><th>Row #</th><th>Field Name</th></tr>
|
||||
${detail.missingDates.map(m => `<tr><td>${m.row}</td><td>${m.field}</td></tr>`).join('')}
|
||||
</table>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.invalidYears.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box warning">
|
||||
<strong>⚠️ Invalid dates/years (${detail.invalidYears.length}):</strong>
|
||||
<table style="font-size: 13px;">
|
||||
<tr><th>Row #</th><th>Field Name</th><th>Column</th><th>Value</th></tr>
|
||||
${detail.invalidYears.map(m => `<tr><td>${m.row}</td><td>${m.field}</td><td>${m.column}</td><td>${m.value}</td></tr>`).join('')}
|
||||
</table>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.invalidNumerics.length > 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box warning">
|
||||
<strong>⚠️ Invalid numeric values (${detail.invalidNumerics.length}):</strong>
|
||||
<table style="font-size: 13px;">
|
||||
<tr><th>Row #</th><th>Field Name</th><th>Column</th><th>Value</th></tr>
|
||||
${detail.invalidNumerics.map(m => `<tr><td>${m.row}</td><td>${m.field}</td><td>${m.column}</td><td>${m.value}</td></tr>`).join('')}
|
||||
</table>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
if (detail.missingDates.length === 0 && detail.invalidYears.length === 0 && detail.invalidNumerics.length === 0) {
|
||||
section.innerHTML += `
|
||||
<div class="message-box success">
|
||||
<strong>✓ All data types valid!</strong> No missing dates or invalid values detected.
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
return section;
|
||||
}
|
||||
|
||||
function showError(fileType, message) {
|
||||
alert(`${fileType} Error: ${message}`);
|
||||
}
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
library(terra)
|
||||
library(sf)
|
||||
|
||||
# Check the mosaic
|
||||
mosaic <- terra::rast('laravel_app/storage/app/angata/weekly_mosaic/week_52_2025.tif')
|
||||
cat('Mosaic info:\n')
|
||||
cat(' Layers:', terra::nlyr(mosaic), '\n')
|
||||
ext_vals <- c(terra::ext(mosaic)$xmin, terra::ext(mosaic)$xmax, terra::ext(mosaic)$ymin, terra::ext(mosaic)$ymax)
|
||||
cat(' Extent:', paste(round(ext_vals, 2), collapse=', '), '\n')
|
||||
|
||||
# Extract band 5
|
||||
band5 <- mosaic[[5]]
|
||||
cat('Band 5 (CI):\n')
|
||||
min_val <- as.numeric(terra::global(band5, 'min', na.rm=TRUE))
|
||||
max_val <- as.numeric(terra::global(band5, 'max', na.rm=TRUE))
|
||||
cat(' Min:', round(min_val, 3), '\n')
|
||||
cat(' Max:', round(max_val, 3), '\n')
|
||||
|
||||
# Check field boundaries
|
||||
geojson_path <- 'laravel_app/storage/app/angata/Data/pivot.geojson'
|
||||
fields <- sf::st_read(geojson_path, quiet=TRUE)
|
||||
cat('\nTesting extraction on first field:\n')
|
||||
|
||||
# Get first field
|
||||
field_1 <- fields[1, ]
|
||||
field_id <- field_1$field
|
||||
cat(' Field ID:', field_id, '\n')
|
||||
|
||||
# Try extraction
|
||||
tryCatch({
|
||||
field_geom <- terra::vect(sf::as_Spatial(field_1))
|
||||
cat(' Geometry CRS:', terra::crs(field_geom), '\n')
|
||||
cat(' Raster CRS:', terra::crs(band5), '\n')
|
||||
|
||||
result <- terra::extract(band5, field_geom)
|
||||
cat(' Extract result rows:', nrow(result), '\n')
|
||||
cat(' Extract result cols:', ncol(result), '\n')
|
||||
|
||||
if (nrow(result) > 0) {
|
||||
vals <- result[, 2]
|
||||
cat(' Values extracted:', length(vals), '\n')
|
||||
cat(' Non-NA values:', sum(!is.na(vals)), '\n')
|
||||
if (sum(!is.na(vals)) > 0) {
|
||||
cat(' Range of non-NA values:', min(vals, na.rm=TRUE), 'to', max(vals, na.rm=TRUE), '\n')
|
||||
}
|
||||
}
|
||||
}, error = function(e) {
|
||||
cat(' ERROR:', e$message, '\n')
|
||||
})
|
||||
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
# Quick script to examine KPI results
|
||||
field_details <- readRDS('laravel_app/storage/app/esa/reports/kpis/esa_field_details_week39.rds')
|
||||
summary_tables <- readRDS('laravel_app/storage/app/esa/reports/kpis/esa_kpi_summary_tables_week39.rds')
|
||||
|
||||
cat("=== FIELD DETAILS ===\n")
|
||||
print(head(field_details, 20))
|
||||
cat("\nTotal rows:", nrow(field_details), "\n\n")
|
||||
|
||||
cat("=== TCH FORECASTED FIELD RESULTS ===\n")
|
||||
tch_results <- readRDS('laravel_app/storage/app/esa/reports/kpis/field_level/tch_forecasted_field_results_week39.rds')
|
||||
print(tch_results)
|
||||
cat("\nNumber of predictions:", nrow(tch_results), "\n\n")
|
||||
|
||||
cat("=== SUMMARY TABLES ===\n")
|
||||
print(summary_tables$tch_forecasted)
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
# Generate Interactive SAR Report
|
||||
# ===============================
|
||||
|
||||
cat("Generating interactive SAR exploration report...\n")
|
||||
|
||||
# Install rmarkdown if needed
|
||||
if (!require(rmarkdown)) {
|
||||
install.packages("rmarkdown")
|
||||
library(rmarkdown)
|
||||
}
|
||||
|
||||
# Set working directory
|
||||
if (basename(getwd()) != "smartcane") {
|
||||
stop("Please run this from the main smartcane directory")
|
||||
}
|
||||
|
||||
# Render the report
|
||||
report_file <- "r_app/SAR_exploration_report.Rmd"
|
||||
output_file <- "output/SAR_exploration_report.html"
|
||||
|
||||
cat("Rendering report:", report_file, "\n")
|
||||
cat("Output file:", output_file, "\n")
|
||||
|
||||
# Render with error handling
|
||||
tryCatch({
|
||||
rmarkdown::render(
|
||||
input = report_file,
|
||||
output_file = output_file,
|
||||
output_format = "html_document",
|
||||
quiet = FALSE
|
||||
)
|
||||
|
||||
cat("\n✓ Report generated successfully!\n")
|
||||
cat("Open", output_file, "in your browser to view the interactive maps.\n")
|
||||
|
||||
# Try to open in browser (Windows)
|
||||
if (.Platform$OS.type == "windows") {
|
||||
shell.exec(normalizePath(output_file))
|
||||
}
|
||||
|
||||
}, error = function(e) {
|
||||
cat("✗ Error generating report:\n")
|
||||
cat(conditionMessage(e), "\n")
|
||||
|
||||
# Try with minimal content first
|
||||
cat("\nTrying minimal report generation...\n")
|
||||
cat("Check the console output above for specific errors.\n")
|
||||
})
|
||||
32983
get-pip.py
32983
get-pip.py
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
Before Width: | Height: | Size: 24 KiB |
|
|
@ -1,27 +0,0 @@
|
|||
# Quick script to inspect the actual band structure of 8-band imagery
|
||||
|
||||
library(terra)
|
||||
|
||||
sample_tif <- "laravel_app/storage/app/esa/merged_tif_8b/2025-01-15.tif"
|
||||
r <- rast(sample_tif)
|
||||
|
||||
cat("Number of bands:", nlyr(r), "\n\n")
|
||||
|
||||
# Check each band's values
|
||||
for (i in 1:nlyr(r)) {
|
||||
band <- r[[i]]
|
||||
vals <- values(band, mat=FALSE)
|
||||
vals_sample <- vals[!is.na(vals)][1:100]
|
||||
|
||||
cat("Band", i, ":\n")
|
||||
cat(" Name:", names(r)[i], "\n")
|
||||
cat(" Sample values:", paste(head(vals_sample, 10), collapse = ", "), "\n")
|
||||
cat(" Min:", min(vals, na.rm=TRUE), "\n")
|
||||
cat(" Max:", max(vals, na.rm=TRUE), "\n")
|
||||
cat(" Mean:", mean(vals, na.rm=TRUE), "\n\n")
|
||||
}
|
||||
|
||||
# Check if band 9 is actually a mask or quality band
|
||||
cat("\nBand 9 unique values (first 50):\n")
|
||||
band9_vals <- values(r[[9]], mat=FALSE)
|
||||
print(head(unique(band9_vals[!is.na(band9_vals)]), 50))
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
# Quick script to inspect band structure of merged_tif_8b files
|
||||
library(terra)
|
||||
library(here)
|
||||
|
||||
# Pick one file to inspect
|
||||
test_file <- here("laravel_app/storage/app/esa/merged_tif_8b/2025-11-15.tif")
|
||||
|
||||
cat("=== INSPECTING BAND STRUCTURE ===\n\n")
|
||||
cat(sprintf("File: %s\n\n", basename(test_file)))
|
||||
|
||||
# Load raster
|
||||
rast_obj <- rast(test_file)
|
||||
|
||||
cat(sprintf("Number of bands: %d\n\n", nlyr(rast_obj)))
|
||||
|
||||
# Check each band
|
||||
for (i in 1:nlyr(rast_obj)) {
|
||||
band <- rast_obj[[i]]
|
||||
band_vals <- values(band, mat = FALSE)
|
||||
band_vals <- band_vals[!is.na(band_vals)]
|
||||
|
||||
cat(sprintf("Band %d:\n", i))
|
||||
cat(sprintf(" Name: %s\n", names(band)))
|
||||
cat(sprintf(" Values range: %.2f to %.2f\n", min(band_vals, na.rm = TRUE), max(band_vals, na.rm = TRUE)))
|
||||
cat(sprintf(" Mean: %.2f\n", mean(band_vals, na.rm = TRUE)))
|
||||
cat(sprintf(" Non-NA pixels: %d\n", length(band_vals)))
|
||||
cat(sprintf(" Sample values: %s\n\n", paste(head(band_vals, 10), collapse = ", ")))
|
||||
}
|
||||
314
kpi_debug.out
314
kpi_debug.out
|
|
@ -1,314 +0,0 @@
|
|||
|
||||
R version 4.4.3 (2025-02-28 ucrt) -- "Trophy Case"
|
||||
Copyright (C) 2025 The R Foundation for Statistical Computing
|
||||
Platform: x86_64-w64-mingw32/x64
|
||||
|
||||
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
||||
You are welcome to redistribute it under certain conditions.
|
||||
Type 'license()' or 'licence()' for distribution details.
|
||||
|
||||
Natural language support but running in an English locale
|
||||
|
||||
R is a collaborative project with many contributors.
|
||||
Type 'contributors()' for more information and
|
||||
'citation()' on how to cite R or R packages in publications.
|
||||
|
||||
Type 'demo()' for some demos, 'help()' for on-line help, or
|
||||
'help.start()' for an HTML browser interface to help.
|
||||
Type 'q()' to quit R.
|
||||
|
||||
- Project 'C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane_v2/smartcane' loaded. [renv 1.1.4]
|
||||
> # 09_CALCULATE_KPIS.R
|
||||
> # ===================
|
||||
> # This script calculates 6 Key Performance Indicators (KPIs) for sugarcane monitoring:
|
||||
> # 1. Field Uniformity Summary
|
||||
> # 2. Farm-wide Area Change Summary
|
||||
> # 3. TCH Forecasted
|
||||
> # 4. Growth Decline Index
|
||||
> # 5. Weed Presence Score
|
||||
> # 6. Gap Filling Score (placeholder)
|
||||
> #
|
||||
> # Usage: Rscript 09_calculate_kpis.R [end_date] [offset] [project_dir]
|
||||
> # - end_date: End date for KPI calculation (YYYY-MM-DD format), default: today
|
||||
> # - offset: Number of days to look back (not currently used for KPIs, but for consistency)
|
||||
> # - project_dir: Project directory name (e.g., "aura", "esa")
|
||||
>
|
||||
> # 1. Load required libraries
|
||||
> # -------------------------
|
||||
> suppressPackageStartupMessages({
|
||||
+ library(here)
|
||||
+ library(sf)
|
||||
+ library(terra)
|
||||
+ library(dplyr)
|
||||
+ library(tidyr)
|
||||
+ library(lubridate)
|
||||
+ library(readr)
|
||||
+ library(caret)
|
||||
+ library(CAST)
|
||||
+ library(randomForest)
|
||||
+ })
|
||||
>
|
||||
> # 2. Main function
|
||||
> # --------------
|
||||
> main <- function() {
|
||||
+ # Process command line arguments
|
||||
+ args <- commandArgs(trailingOnly = TRUE)
|
||||
+
|
||||
+ # Process end_date argument
|
||||
+ if (length(args) >= 1 && !is.na(args[1])) {
|
||||
+ end_date <- as.Date(args[1])
|
||||
+ if (is.na(end_date)) {
|
||||
+ warning("Invalid end_date provided. Using default (current date).")
|
||||
+ end_date <- Sys.Date()
|
||||
+ }
|
||||
+ } else {
|
||||
+ end_date <- Sys.Date()
|
||||
+ }
|
||||
+
|
||||
+ # Process offset argument (for consistency with other scripts, not currently used)
|
||||
+ if (length(args) >= 2 && !is.na(args[2])) {
|
||||
+ offset <- as.numeric(args[2])
|
||||
+ if (is.na(offset) || offset <= 0) {
|
||||
+ warning("Invalid offset provided. Using default (7 days).")
|
||||
+ offset <- 7
|
||||
+ }
|
||||
+ } else {
|
||||
+ offset <- 7
|
||||
+ }
|
||||
+
|
||||
+ # Process project_dir argument
|
||||
+ if (length(args) >= 3 && !is.na(args[3])) {
|
||||
+ project_dir <- as.character(args[3])
|
||||
+ } else {
|
||||
+ project_dir <- "esa" # Default project
|
||||
+ }
|
||||
+
|
||||
+ # Make project_dir available globally so parameters_project.R can use it
|
||||
+ assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
+
|
||||
+ # 3. Load utility functions and project configuration
|
||||
+ # --------------------------------------------------
|
||||
+
|
||||
+ tryCatch({
|
||||
+ source(here("r_app", "crop_messaging_utils.R"))
|
||||
+ }, error = function(e) {
|
||||
+ stop("Error loading crop_messaging_utils.R: ", e$message)
|
||||
+ })
|
||||
+
|
||||
+ tryCatch({
|
||||
+ source(here("r_app", "kpi_utils.R"))
|
||||
+ }, error = function(e) {
|
||||
+ stop("Error loading kpi_utils.R: ", e$message)
|
||||
+ })
|
||||
+
|
||||
+ # Load project parameters (this sets up all directory paths and field boundaries)
|
||||
+ tryCatch({
|
||||
+ source(here("r_app", "parameters_project.R"))
|
||||
+ }, error = function(e) {
|
||||
+ stop("Error loading parameters_project.R: ", e$message)
|
||||
+ })
|
||||
+
|
||||
+ # Load growth model utils if available (for yield prediction)
|
||||
+ tryCatch({
|
||||
+ source(here("r_app", "growth_model_utils.R"))
|
||||
+ }, error = function(e) {
|
||||
+ warning("growth_model_utils.R not found, yield prediction KPI will use placeholder data")
|
||||
+ })
|
||||
+
|
||||
+ # Check if required variables exist
|
||||
+ if (!exists("project_dir")) {
|
||||
+ stop("project_dir must be set before running this script")
|
||||
+ }
|
||||
+
|
||||
+ if (!exists("field_boundaries_sf") || is.null(field_boundaries_sf)) {
|
||||
+ stop("Field boundaries not loaded. Check parameters_project.R initialization.")
|
||||
+ }
|
||||
+
|
||||
+ # 4. Calculate all KPIs
|
||||
+ # -------------------
|
||||
+ output_dir <- file.path(reports_dir, "kpis")
|
||||
+
|
||||
+ kpi_results <- calculate_all_kpis(
|
||||
+ report_date = end_date,
|
||||
+ output_dir = output_dir,
|
||||
+ field_boundaries_sf = field_boundaries_sf,
|
||||
+ harvesting_data = harvesting_data,
|
||||
+ cumulative_CI_vals_dir = cumulative_CI_vals_dir,
|
||||
+ weekly_CI_mosaic = weekly_CI_mosaic,
|
||||
+ reports_dir = reports_dir,
|
||||
+ project_dir = project_dir
|
||||
+ )
|
||||
+
|
||||
+ # 5. Print summary
|
||||
+ # --------------
|
||||
+ cat("\n=== KPI CALCULATION SUMMARY ===\n")
|
||||
+ cat("Report Date:", as.character(kpi_results$metadata$report_date), "\n")
|
||||
+ cat("Current Week:", kpi_results$metadata$current_week, "\n")
|
||||
+ cat("Previous Week:", kpi_results$metadata$previous_week, "\n")
|
||||
+ cat("Total Fields Analyzed:", kpi_results$metadata$total_fields, "\n")
|
||||
+ cat("Calculation Time:", as.character(kpi_results$metadata$calculation_time), "\n")
|
||||
+
|
||||
+ cat("\nField Uniformity Summary:\n")
|
||||
+ print(kpi_results$field_uniformity_summary)
|
||||
+
|
||||
+ cat("\nArea Change Summary:\n")
|
||||
+ print(kpi_results$area_change)
|
||||
+
|
||||
+ cat("\nTCH Forecasted:\n")
|
||||
+ print(kpi_results$tch_forecasted)
|
||||
+
|
||||
+ cat("\nGrowth Decline Index:\n")
|
||||
+ print(kpi_results$growth_decline)
|
||||
+
|
||||
+ cat("\nWeed Presence Score:\n")
|
||||
+ print(kpi_results$weed_presence)
|
||||
+
|
||||
+ cat("\nGap Filling Score:\n")
|
||||
+ print(kpi_results$gap_filling)
|
||||
+
|
||||
+ cat("\n=== KPI CALCULATION COMPLETED ===\n")
|
||||
+ }
|
||||
>
|
||||
> # 6. Script execution
|
||||
> # -----------------
|
||||
> if (sys.nframe() == 0) {
|
||||
+ main()
|
||||
+ }
|
||||
[INFO] 2025-10-08 15:39:29 - Initializing project with directory: esa
|
||||
[1] "model using cumulative_CI,DOY will be trained now..."
|
||||
note: only 1 unique complexity parameters in default grid. Truncating the grid to 1 .
|
||||
|
||||
+ Fold1: mtry=2
|
||||
- Fold1: mtry=2
|
||||
+ Fold2: mtry=2
|
||||
- Fold2: mtry=2
|
||||
+ Fold3: mtry=2
|
||||
- Fold3: mtry=2
|
||||
+ Fold4: mtry=2
|
||||
- Fold4: mtry=2
|
||||
+ Fold5: mtry=2
|
||||
- Fold5: mtry=2
|
||||
Aggregating results
|
||||
Fitting final model on full training set
|
||||
[1] "maximum number of models that still need to be trained: 3"
|
||||
[1] "model using cumulative_CI,CI_per_day will be trained now..."
|
||||
note: only 1 unique complexity parameters in default grid. Truncating the grid to 1 .
|
||||
|
||||
+ Fold1: mtry=2
|
||||
- Fold1: mtry=2
|
||||
+ Fold2: mtry=2
|
||||
- Fold2: mtry=2
|
||||
+ Fold3: mtry=2
|
||||
- Fold3: mtry=2
|
||||
+ Fold4: mtry=2
|
||||
- Fold4: mtry=2
|
||||
+ Fold5: mtry=2
|
||||
- Fold5: mtry=2
|
||||
Aggregating results
|
||||
Fitting final model on full training set
|
||||
[1] "maximum number of models that still need to be trained: 2"
|
||||
[1] "model using DOY,CI_per_day will be trained now..."
|
||||
note: only 1 unique complexity parameters in default grid. Truncating the grid to 1 .
|
||||
|
||||
+ Fold1: mtry=2
|
||||
- Fold1: mtry=2
|
||||
+ Fold2: mtry=2
|
||||
- Fold2: mtry=2
|
||||
+ Fold3: mtry=2
|
||||
- Fold3: mtry=2
|
||||
+ Fold4: mtry=2
|
||||
- Fold4: mtry=2
|
||||
+ Fold5: mtry=2
|
||||
- Fold5: mtry=2
|
||||
Aggregating results
|
||||
Fitting final model on full training set
|
||||
[1] "maximum number of models that still need to be trained: 1"
|
||||
[1] "vars selected: cumulative_CI,DOY with RMSE 24.808"
|
||||
[1] "model using additional variable CI_per_day will be trained now..."
|
||||
note: only 2 unique complexity parameters in default grid. Truncating the grid to 2 .
|
||||
|
||||
+ Fold1: mtry=2
|
||||
- Fold1: mtry=2
|
||||
+ Fold1: mtry=3
|
||||
- Fold1: mtry=3
|
||||
+ Fold2: mtry=2
|
||||
- Fold2: mtry=2
|
||||
+ Fold2: mtry=3
|
||||
- Fold2: mtry=3
|
||||
+ Fold3: mtry=2
|
||||
- Fold3: mtry=2
|
||||
+ Fold3: mtry=3
|
||||
- Fold3: mtry=3
|
||||
+ Fold4: mtry=2
|
||||
- Fold4: mtry=2
|
||||
+ Fold4: mtry=3
|
||||
- Fold4: mtry=3
|
||||
+ Fold5: mtry=2
|
||||
- Fold5: mtry=2
|
||||
+ Fold5: mtry=3
|
||||
- Fold5: mtry=3
|
||||
Aggregating results
|
||||
Selecting tuning parameters
|
||||
Fitting mtry = 3 on full training set
|
||||
[1] "maximum number of models that still need to be trained: 0"
|
||||
[1] "vars selected: cumulative_CI,DOY with RMSE 24.808"
|
||||
field_groups count value
|
||||
75% Top 25% 3 96.2
|
||||
50% Average 7 93.0
|
||||
25% Lowest 25% 2 84.0
|
||||
Total area forecasted 12 219.0
|
||||
|
||||
=== KPI CALCULATION SUMMARY ===
|
||||
Report Date: 2025-10-08
|
||||
Current Week: 40
|
||||
Previous Week: 39
|
||||
Total Fields Analyzed: 12
|
||||
Calculation Time: 2025-10-08 15:39:34.583434
|
||||
|
||||
Field Uniformity Summary:
|
||||
uniformity_level count percent
|
||||
1 Excellent 0 0
|
||||
2 Good 0 0
|
||||
3 Moderate 0 0
|
||||
4 Poor 0 0
|
||||
|
||||
Area Change Summary:
|
||||
change_type hectares percent
|
||||
1 Improving areas 0 0
|
||||
2 Stable areas 0 0
|
||||
3 Declining areas 0 0
|
||||
4 Total area 0 100
|
||||
|
||||
TCH Forecasted:
|
||||
field_groups count value
|
||||
75% Top 25% 3 96.2
|
||||
50% Average 7 93.0
|
||||
25% Lowest 25% 2 84.0
|
||||
Total area forecasted 12 219.0
|
||||
|
||||
Growth Decline Index:
|
||||
risk_level count percent
|
||||
1 High 0 0
|
||||
2 Low 0 0
|
||||
3 Moderate 0 0
|
||||
4 Very-high 0 0
|
||||
|
||||
Weed Presence Score:
|
||||
weed_risk_level field_count percent
|
||||
1 Canopy closed - Low weed risk 4 33.3
|
||||
2 High 0 0.0
|
||||
3 Low 0 0.0
|
||||
4 Moderate 0 0.0
|
||||
|
||||
Gap Filling Score:
|
||||
# A tibble: 1 × 3
|
||||
gap_level field_count percent
|
||||
<chr> <int> <dbl>
|
||||
1 <NA> 12 100
|
||||
|
||||
=== KPI CALCULATION COMPLETED ===
|
||||
There were 50 or more warnings (use warnings() to see the first 50)
|
||||
>
|
||||
> proc.time()
|
||||
user system elapsed
|
||||
11.93 0.93 13.45
|
||||
Binary file not shown.
|
|
@ -1,447 +0,0 @@
|
|||
# ============================================================================
|
||||
# OPERATIONAL HARVEST PREDICTION
|
||||
# Analyze current season growth curves to predict harvest timing
|
||||
# ============================================================================
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(lubridate)
|
||||
library(terra)
|
||||
library(sf)
|
||||
library(here)
|
||||
library(ggplot2)
|
||||
})
|
||||
|
||||
# Set project directory
|
||||
project_dir <- "esa"
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
|
||||
source(here("r_app", "parameters_project.R"))
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1: LOAD DATA
|
||||
# ============================================================================
|
||||
|
||||
cat("=== LOADING DATA ===\n\n")
|
||||
|
||||
# Load CI time series
|
||||
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
||||
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
||||
|
||||
time_series_daily <- ci_data_raw %>%
|
||||
mutate(
|
||||
date = as.Date(Date),
|
||||
week = isoweek(date),
|
||||
year = isoyear(date)
|
||||
) %>%
|
||||
select(
|
||||
field_id = field,
|
||||
date,
|
||||
week,
|
||||
year,
|
||||
mean_ci = FitData
|
||||
) %>%
|
||||
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
|
||||
arrange(field_id, date)
|
||||
|
||||
# Load harvest data
|
||||
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
||||
mutate(
|
||||
season_start = as.Date(season_start),
|
||||
season_end = as.Date(season_end)
|
||||
) %>%
|
||||
filter(!is.na(season_end))
|
||||
|
||||
fields_with_ci <- unique(time_series_daily$field_id)
|
||||
harvest_data_filtered <- harvest_data %>%
|
||||
filter(field %in% fields_with_ci) %>%
|
||||
arrange(field, season_end)
|
||||
|
||||
cat("Loaded CI data for", length(fields_with_ci), "fields\n")
|
||||
cat("Loaded harvest data for", length(unique(harvest_data_filtered$field)), "fields\n\n")
|
||||
|
||||
# ============================================================================
|
||||
# STEP 2: SEGMENT TIME SERIES BY SEASON
|
||||
# ============================================================================
|
||||
|
||||
cat("=== SEGMENTING TIME SERIES INTO INDIVIDUAL SEASONS ===\n\n")
|
||||
|
||||
# For each field, create seasons based on harvest dates
|
||||
# Season starts day after previous harvest, ends at next harvest
|
||||
create_seasons <- function(field_name, ci_ts, harvest_df) {
|
||||
# Get CI data for this field
|
||||
field_ci <- ci_ts %>%
|
||||
filter(field_id == field_name) %>%
|
||||
arrange(date)
|
||||
|
||||
# Get harvest dates for this field
|
||||
field_harvests <- harvest_df %>%
|
||||
filter(field == field_name) %>%
|
||||
arrange(season_end) %>%
|
||||
mutate(season_id = row_number())
|
||||
|
||||
if (nrow(field_harvests) == 0) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Create season segments
|
||||
seasons_list <- list()
|
||||
|
||||
for (i in 1:nrow(field_harvests)) {
|
||||
# Season start: day after previous harvest (or start of data if first season)
|
||||
if (i == 1) {
|
||||
season_start <- min(field_ci$date)
|
||||
} else {
|
||||
season_start <- field_harvests$season_end[i-1] + 1
|
||||
}
|
||||
|
||||
# Season end: current harvest date
|
||||
season_end <- field_harvests$season_end[i]
|
||||
|
||||
# Extract CI data for this season
|
||||
season_ci <- field_ci %>%
|
||||
filter(date >= season_start, date <= season_end)
|
||||
|
||||
if (nrow(season_ci) > 0) {
|
||||
season_ci$season_id <- i
|
||||
season_ci$season_start_date <- season_start
|
||||
season_ci$season_end_date <- season_end
|
||||
season_ci$days_in_season <- as.numeric(season_end - season_start)
|
||||
season_ci$days_since_start <- as.numeric(season_ci$date - season_start)
|
||||
season_ci$days_until_harvest <- as.numeric(season_end - season_ci$date)
|
||||
|
||||
seasons_list[[i]] <- season_ci
|
||||
}
|
||||
}
|
||||
|
||||
# Add current ongoing season (after last harvest)
|
||||
if (nrow(field_harvests) > 0) {
|
||||
last_harvest <- field_harvests$season_end[nrow(field_harvests)]
|
||||
current_season_start <- last_harvest + 1
|
||||
|
||||
current_season_ci <- field_ci %>%
|
||||
filter(date >= current_season_start)
|
||||
|
||||
if (nrow(current_season_ci) > 0) {
|
||||
current_season_ci$season_id <- nrow(field_harvests) + 1
|
||||
current_season_ci$season_start_date <- current_season_start
|
||||
current_season_ci$season_end_date <- NA # Unknown - this is what we're predicting
|
||||
current_season_ci$days_in_season <- NA
|
||||
current_season_ci$days_since_start <- as.numeric(current_season_ci$date - current_season_start)
|
||||
current_season_ci$days_until_harvest <- NA
|
||||
|
||||
seasons_list[[length(seasons_list) + 1]] <- current_season_ci
|
||||
}
|
||||
}
|
||||
|
||||
if (length(seasons_list) > 0) {
|
||||
return(bind_rows(seasons_list))
|
||||
} else {
|
||||
return(NULL)
|
||||
}
|
||||
}
|
||||
|
||||
# Create segmented data for all fields
|
||||
all_seasons <- lapply(fields_with_ci, function(field_name) {
|
||||
seasons <- create_seasons(field_name, time_series_daily, harvest_data_filtered)
|
||||
if (!is.null(seasons)) {
|
||||
seasons$field_id <- field_name
|
||||
}
|
||||
return(seasons)
|
||||
}) %>%
|
||||
bind_rows()
|
||||
|
||||
cat("Created", nrow(all_seasons), "season-segmented observations\n")
|
||||
cat("Total seasons:", length(unique(paste(all_seasons$field_id, all_seasons$season_id))), "\n\n")
|
||||
|
||||
# Summary by season
|
||||
season_summary <- all_seasons %>%
|
||||
group_by(field_id, season_id) %>%
|
||||
summarise(
|
||||
season_start = min(season_start_date),
|
||||
season_end = max(season_end_date),
|
||||
n_observations = n(),
|
||||
days_duration = max(days_in_season, na.rm = TRUE),
|
||||
max_ci = max(mean_ci, na.rm = TRUE),
|
||||
is_current = all(is.na(season_end_date)),
|
||||
.groups = "drop"
|
||||
)
|
||||
|
||||
cat("Season summary:\n")
|
||||
print(head(season_summary, 20))
|
||||
|
||||
# ============================================================================
|
||||
# STEP 3: GROWTH CURVE ANALYSIS PER SEASON
|
||||
# ============================================================================
|
||||
|
||||
cat("\n\n=== ANALYZING GROWTH CURVES PER SEASON ===\n\n")
|
||||
|
||||
# Smoothing function (Savitzky-Golay style moving average)
|
||||
smooth_ci <- function(ci_values, window = 15) {
|
||||
n <- length(ci_values)
|
||||
if (n < window) window <- max(3, n)
|
||||
|
||||
smoothed <- rep(NA, n)
|
||||
half_window <- floor(window / 2)
|
||||
|
||||
for (i in 1:n) {
|
||||
start_idx <- max(1, i - half_window)
|
||||
end_idx <- min(n, i + half_window)
|
||||
smoothed[i] <- mean(ci_values[start_idx:end_idx], na.rm = TRUE)
|
||||
}
|
||||
|
||||
return(smoothed)
|
||||
}
|
||||
|
||||
# Detect peak and senescence
|
||||
analyze_season_curve <- function(season_df) {
|
||||
if (nrow(season_df) < 20) {
|
||||
return(list(
|
||||
peak_date = NA,
|
||||
peak_ci = NA,
|
||||
peak_days_since_start = NA,
|
||||
senescence_start_date = NA,
|
||||
senescence_rate = NA,
|
||||
current_phase = "insufficient_data"
|
||||
))
|
||||
}
|
||||
|
||||
# Smooth the curve
|
||||
season_df$ci_smooth <- smooth_ci(season_df$mean_ci)
|
||||
|
||||
# Find peak
|
||||
peak_idx <- which.max(season_df$ci_smooth)
|
||||
peak_date <- season_df$date[peak_idx]
|
||||
peak_ci <- season_df$ci_smooth[peak_idx]
|
||||
peak_days <- season_df$days_since_start[peak_idx]
|
||||
|
||||
# Check if we're past the peak
|
||||
last_date <- max(season_df$date)
|
||||
is_post_peak <- last_date > peak_date
|
||||
|
||||
# Calculate senescence rate (slope after peak)
|
||||
if (is_post_peak && peak_idx < nrow(season_df) - 5) {
|
||||
post_peak_data <- season_df[(peak_idx):nrow(season_df), ]
|
||||
|
||||
# Fit linear model to post-peak data
|
||||
lm_post <- lm(ci_smooth ~ days_since_start, data = post_peak_data)
|
||||
senescence_rate <- coef(lm_post)[2] # Slope
|
||||
senescence_start <- peak_date
|
||||
} else {
|
||||
senescence_rate <- NA
|
||||
senescence_start <- NA
|
||||
}
|
||||
|
||||
# Determine current phase
|
||||
current_ci <- tail(season_df$ci_smooth, 1)
|
||||
|
||||
if (is.na(current_ci)) {
|
||||
current_phase <- "unknown"
|
||||
} else if (!is_post_peak) {
|
||||
current_phase <- "growing"
|
||||
} else if (current_ci > 2.5) {
|
||||
current_phase <- "post_peak_maturing"
|
||||
} else {
|
||||
current_phase <- "declining_harvest_approaching"
|
||||
}
|
||||
|
||||
return(list(
|
||||
peak_date = peak_date,
|
||||
peak_ci = peak_ci,
|
||||
peak_days_since_start = peak_days,
|
||||
senescence_start_date = senescence_start,
|
||||
senescence_rate = senescence_rate,
|
||||
current_phase = current_phase,
|
||||
current_ci = current_ci,
|
||||
last_obs_date = last_date
|
||||
))
|
||||
}
|
||||
|
||||
# Analyze each season
|
||||
season_analysis <- all_seasons %>%
|
||||
group_by(field_id, season_id) %>%
|
||||
group_modify(~ {
|
||||
analysis <- analyze_season_curve(.x)
|
||||
as.data.frame(analysis)
|
||||
}) %>%
|
||||
ungroup()
|
||||
|
||||
# Merge with season summary
|
||||
season_results <- season_summary %>%
|
||||
left_join(season_analysis, by = c("field_id", "season_id"))
|
||||
|
||||
cat("Analyzed", nrow(season_results), "seasons\n\n")
|
||||
|
||||
# ============================================================================
|
||||
# STEP 4: HARVEST TIMING PATTERNS (Historical Analysis)
|
||||
# ============================================================================
|
||||
|
||||
cat("=== ANALYZING HISTORICAL HARVEST TIMING PATTERNS ===\n\n")
|
||||
|
||||
# Look at completed seasons only
|
||||
historical_seasons <- season_results %>%
|
||||
filter(!is_current) %>%
|
||||
mutate(
|
||||
days_peak_to_harvest = as.numeric(season_end - peak_date)
|
||||
)
|
||||
|
||||
cat("Historical season statistics (completed harvests):\n\n")
|
||||
|
||||
cat("Average days from peak to harvest:\n")
|
||||
peak_to_harvest_stats <- historical_seasons %>%
|
||||
filter(!is.na(days_peak_to_harvest)) %>%
|
||||
summarise(
|
||||
mean_days = mean(days_peak_to_harvest, na.rm = TRUE),
|
||||
median_days = median(days_peak_to_harvest, na.rm = TRUE),
|
||||
sd_days = sd(days_peak_to_harvest, na.rm = TRUE),
|
||||
min_days = min(days_peak_to_harvest, na.rm = TRUE),
|
||||
max_days = max(days_peak_to_harvest, na.rm = TRUE)
|
||||
)
|
||||
print(peak_to_harvest_stats)
|
||||
|
||||
cat("\n\nPeak CI at harvest time:\n")
|
||||
peak_ci_stats <- historical_seasons %>%
|
||||
filter(!is.na(peak_ci)) %>%
|
||||
summarise(
|
||||
mean_peak_ci = mean(peak_ci, na.rm = TRUE),
|
||||
median_peak_ci = median(peak_ci, na.rm = TRUE),
|
||||
sd_peak_ci = sd(peak_ci, na.rm = TRUE)
|
||||
)
|
||||
print(peak_ci_stats)
|
||||
|
||||
cat("\n\nSenescence rate (CI decline per day after peak):\n")
|
||||
senescence_stats <- historical_seasons %>%
|
||||
filter(!is.na(senescence_rate), senescence_rate < 0) %>%
|
||||
summarise(
|
||||
mean_rate = mean(senescence_rate, na.rm = TRUE),
|
||||
median_rate = median(senescence_rate, na.rm = TRUE),
|
||||
sd_rate = sd(senescence_rate, na.rm = TRUE)
|
||||
)
|
||||
print(senescence_stats)
|
||||
|
||||
# ============================================================================
|
||||
# STEP 5: CURRENT SEASON PREDICTIONS
|
||||
# ============================================================================
|
||||
|
||||
cat("\n\n=== PREDICTING HARVEST FOR CURRENT ONGOING SEASONS ===\n\n")
|
||||
|
||||
# Get current seasons
|
||||
current_seasons <- season_results %>%
|
||||
filter(is_current) %>%
|
||||
mutate(
|
||||
# Use historical average to predict harvest
|
||||
predicted_harvest_date = peak_date + peak_to_harvest_stats$mean_days,
|
||||
days_until_predicted_harvest = as.numeric(predicted_harvest_date - last_obs_date),
|
||||
weeks_until_predicted_harvest = days_until_predicted_harvest / 7
|
||||
)
|
||||
|
||||
cat("Current ongoing seasons (ready for harvest prediction):\n\n")
|
||||
|
||||
current_predictions <- current_seasons %>%
|
||||
mutate(
|
||||
days_since_peak = as.numeric(last_obs_date - peak_date)
|
||||
) %>%
|
||||
select(
|
||||
field_id,
|
||||
season_id,
|
||||
last_harvest = season_start,
|
||||
last_observation = last_obs_date,
|
||||
current_ci,
|
||||
current_phase,
|
||||
peak_date,
|
||||
peak_ci,
|
||||
days_since_peak,
|
||||
predicted_harvest = predicted_harvest_date,
|
||||
weeks_until_harvest = weeks_until_predicted_harvest
|
||||
) %>%
|
||||
arrange(weeks_until_harvest)
|
||||
|
||||
print(current_predictions)
|
||||
|
||||
cat("\n\nHarvest readiness assessment:\n\n")
|
||||
|
||||
harvest_alerts <- current_predictions %>%
|
||||
mutate(
|
||||
alert = case_when(
|
||||
current_ci < 2.5 & current_phase == "declining_harvest_approaching" ~ "🚨 HARVEST IMMINENT (CI < 2.5)",
|
||||
current_ci < 3.0 & weeks_until_harvest < 2 ~ "⚠️ HARVEST WITHIN 2 WEEKS",
|
||||
weeks_until_harvest < 4 ~ "💡 HARVEST WITHIN 1 MONTH",
|
||||
current_phase == "growing" ~ "✅ STILL GROWING",
|
||||
TRUE ~ "📊 MONITORING"
|
||||
)
|
||||
) %>%
|
||||
select(field_id, current_ci, current_phase, predicted_harvest, alert)
|
||||
|
||||
print(harvest_alerts)
|
||||
|
||||
# ============================================================================
|
||||
# STEP 6: VALIDATION OF PREDICTION METHOD
|
||||
# ============================================================================
|
||||
|
||||
cat("\n\n=== VALIDATING PREDICTION METHOD ON HISTORICAL DATA ===\n\n")
|
||||
|
||||
# For each historical season, predict when harvest would occur using only data up to peak
|
||||
validation_results <- historical_seasons %>%
|
||||
filter(!is.na(peak_date), !is.na(season_end)) %>%
|
||||
mutate(
|
||||
predicted_harvest = peak_date + peak_to_harvest_stats$mean_days,
|
||||
actual_harvest = season_end,
|
||||
prediction_error_days = as.numeric(predicted_harvest - actual_harvest),
|
||||
prediction_error_weeks = prediction_error_days / 7
|
||||
)
|
||||
|
||||
cat("Prediction accuracy metrics:\n\n")
|
||||
|
||||
accuracy_metrics <- validation_results %>%
|
||||
summarise(
|
||||
n_predictions = n(),
|
||||
mean_error_days = mean(abs(prediction_error_days), na.rm = TRUE),
|
||||
median_error_days = median(abs(prediction_error_days), na.rm = TRUE),
|
||||
rmse_days = sqrt(mean(prediction_error_days^2, na.rm = TRUE)),
|
||||
within_2_weeks = sum(abs(prediction_error_weeks) <= 2, na.rm = TRUE),
|
||||
pct_within_2_weeks = 100 * sum(abs(prediction_error_weeks) <= 2, na.rm = TRUE) / n()
|
||||
)
|
||||
|
||||
print(accuracy_metrics)
|
||||
|
||||
cat("\n\nSample predictions vs actual:\n")
|
||||
print(validation_results %>%
|
||||
select(field_id, season_id, peak_date, predicted_harvest, actual_harvest,
|
||||
prediction_error_weeks) %>%
|
||||
head(15))
|
||||
|
||||
# ============================================================================
|
||||
# SUMMARY
|
||||
# ============================================================================
|
||||
|
||||
cat("\n\n=== OPERATIONAL HARVEST PREDICTION SUMMARY ===\n\n")
|
||||
|
||||
cat("METHODOLOGY:\n")
|
||||
cat("1. Segment CI time series by harvest dates (each season = planting to harvest)\n")
|
||||
cat("2. Smooth CI data to identify peak (maturity point)\n")
|
||||
cat("3. Historical pattern: Average", round(peak_to_harvest_stats$mean_days), "days from peak to harvest\n")
|
||||
cat("4. Current season prediction: Peak date +", round(peak_to_harvest_stats$mean_days), "days\n\n")
|
||||
|
||||
cat("PREDICTION ACCURACY (Historical Validation):\n")
|
||||
cat(" - Mean absolute error:", round(accuracy_metrics$mean_error_days), "days\n")
|
||||
cat(" - RMSE:", round(accuracy_metrics$rmse_days), "days\n")
|
||||
cat(" - Accuracy within 2 weeks:", round(accuracy_metrics$pct_within_2_weeks), "%\n\n")
|
||||
|
||||
cat("HARVEST TRIGGER (Operational Rule):\n")
|
||||
cat(" - Primary: CI drops below 2.5 while in declining phase\n")
|
||||
cat(" - Secondary: Predicted harvest date approaches (±2 weeks)\n")
|
||||
cat(" - Confirmation: Visual inspection when both conditions met\n\n")
|
||||
|
||||
cat("FIELDS READY FOR HARVEST NOW:\n")
|
||||
ready_now <- harvest_alerts %>%
|
||||
filter(grepl("IMMINENT|WITHIN 2 WEEKS", alert))
|
||||
|
||||
if (nrow(ready_now) > 0) {
|
||||
print(ready_now)
|
||||
} else {
|
||||
cat(" No fields at immediate harvest stage\n")
|
||||
}
|
||||
|
||||
cat("\n=== ANALYSIS COMPLETE ===\n")
|
||||
|
|
@ -1,72 +0,0 @@
|
|||
# SmartCane - Git Push to Bitbucket
|
||||
# Run this script to commit and push all changes
|
||||
|
||||
# Step 1: Check current status
|
||||
Write-Host "=== Current Git Status ===" -ForegroundColor Cyan
|
||||
git status
|
||||
|
||||
# Step 2: Add all new and modified files
|
||||
Write-Host "`n=== Adding Files ===" -ForegroundColor Cyan
|
||||
git add -A
|
||||
|
||||
# Step 3: Show what will be committed
|
||||
Write-Host "`n=== Files to be committed ===" -ForegroundColor Cyan
|
||||
git status
|
||||
|
||||
# Step 4: Commit with descriptive message
|
||||
Write-Host "`n=== Committing Changes ===" -ForegroundColor Cyan
|
||||
$commitMessage = @"
|
||||
Add KPI reporting system and deployment documentation
|
||||
|
||||
Major Changes:
|
||||
- NEW: Scripts 09 & 10 for KPI calculation and enhanced reporting
|
||||
- NEW: Shell script wrappers (01-10) for easier execution
|
||||
- NEW: R packages flextable and officer for enhanced Word reports
|
||||
- NEW: DEPLOYMENT_README.md with complete deployment guide
|
||||
- RENAMED: Numbered R scripts (02, 03, 04) for clarity
|
||||
- REMOVED: Old package management scripts (using renv only)
|
||||
- UPDATED: Workflow now uses scripts 09->10 instead of 05
|
||||
|
||||
Files Changed: 90+ files
|
||||
New Packages: flextable, officer
|
||||
New Scripts: 09_run_calculate_kpis.sh, 10_run_kpi_report.sh
|
||||
Documentation: DEPLOYMENT_README.md, EMAIL_TO_ADMIN.txt
|
||||
|
||||
See DEPLOYMENT_README.md for full deployment instructions.
|
||||
"@
|
||||
|
||||
git commit -m $commitMessage
|
||||
|
||||
# Step 5: Push to Bitbucket
|
||||
Write-Host "`n=== Ready to Push ===" -ForegroundColor Yellow
|
||||
Write-Host "Current branch: " -NoNewline
|
||||
git branch --show-current
|
||||
|
||||
Write-Host "`nDo you want to push to Bitbucket? (Y/N): " -ForegroundColor Yellow -NoNewline
|
||||
$confirmation = Read-Host
|
||||
|
||||
if ($confirmation -eq 'Y' -or $confirmation -eq 'y') {
|
||||
Write-Host "`n=== Pushing to Bitbucket ===" -ForegroundColor Green
|
||||
|
||||
# Get current branch name
|
||||
$branch = git branch --show-current
|
||||
|
||||
# Push to origin
|
||||
git push origin $branch
|
||||
|
||||
Write-Host "`n[SUCCESS] Pushed to Bitbucket!" -ForegroundColor Green
|
||||
Write-Host "`nNext steps:" -ForegroundColor Cyan
|
||||
Write-Host "1. Send EMAIL_TO_ADMIN.txt to your administrator"
|
||||
Write-Host "2. Ensure they have access to the Bitbucket repository"
|
||||
Write-Host "3. Monitor deployment and test on Linux server"
|
||||
Write-Host "4. Update Laravel UI with Script 10 parameters"
|
||||
|
||||
} else {
|
||||
Write-Host "`n[CANCELLED] Push cancelled. Run 'git push origin $(git branch --show-current)' when ready." -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
Write-Host "`n=== Summary ===" -ForegroundColor Cyan
|
||||
Write-Host "Deployment guide: DEPLOYMENT_README.md"
|
||||
Write-Host "Admin email: EMAIL_TO_ADMIN.txt"
|
||||
Write-Host "New scripts: 09_run_calculate_kpis.sh, 10_run_kpi_report.sh"
|
||||
Write-Host "New packages: flextable, officer"
|
||||
|
|
@ -51,6 +51,7 @@ from harvest_date_pred_utils import (
|
|||
def main():
|
||||
# Get project name from command line or use default
|
||||
project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
|
||||
field_filter = sys.argv[2] if len(sys.argv) > 2 else None # Optional: test single field
|
||||
|
||||
# Construct paths
|
||||
base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
|
||||
|
|
@ -71,6 +72,8 @@ def main():
|
|||
|
||||
print("="*80)
|
||||
print(f"HARVEST DATE PREDICTION - LSTM MODEL 307 ({project_name})")
|
||||
if field_filter:
|
||||
print(f"TEST MODE: Single field ({field_filter})")
|
||||
print("="*80)
|
||||
|
||||
# [1/4] Load model
|
||||
|
|
@ -82,14 +85,31 @@ def main():
|
|||
# [2/4] Load and prepare CI data
|
||||
print("\n[2/4] Loading CI data...")
|
||||
print(f" From: {CI_DATA_FILE}")
|
||||
ci_data = pd.read_csv(CI_DATA_FILE)
|
||||
ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str}) # Force field as string
|
||||
ci_data['Date'] = pd.to_datetime(ci_data['Date'])
|
||||
print(f" Loaded {len(ci_data)} daily rows across {ci_data['field'].nunique()} fields")
|
||||
print(f" Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}")
|
||||
|
||||
# Optional: Filter to single field for testing
|
||||
if field_filter:
|
||||
field_filter = str(field_filter) # Ensure field_filter is string
|
||||
ci_data_filtered = ci_data[ci_data['field'] == field_filter]
|
||||
if len(ci_data_filtered) == 0:
|
||||
print(f"\n✗ ERROR: No data found for field '{field_filter}'")
|
||||
available_fields = sorted(ci_data['field'].unique())
|
||||
print(f" Available fields ({len(available_fields)}): {', '.join(available_fields[:10])}")
|
||||
if len(available_fields) > 10:
|
||||
print(f" ... and {len(available_fields) - 10} more")
|
||||
return
|
||||
ci_data = ci_data_filtered
|
||||
print(f" ✓ Filtered to single field: {field_filter}")
|
||||
print(f" Data points: {len(ci_data)} days")
|
||||
|
||||
# [3/4] Run model predictions with two-step detection
|
||||
print("\n[3/4] Running two-step harvest detection...")
|
||||
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device)
|
||||
print(" (Using threshold=0.45, consecutive_days=2 - tuned for Model 307 output)")
|
||||
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
|
||||
phase1_threshold=0.45, phase1_consecutive=2)
|
||||
|
||||
# Build and export
|
||||
print("\nBuilding production harvest table...")
|
||||
|
|
@ -102,10 +122,11 @@ def main():
|
|||
print(f" Input: laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
|
||||
print(f" Output: laravel_app/storage/app/{project_name}/Data/HarvestData/")
|
||||
print(f"\nColumn structure:")
|
||||
print(f" field, sub_field, season, year, season_start_date, season_end_date, phase1_harvest_date")
|
||||
print(f" field, sub_field, season, season_start_date, season_end_date, phase2_harvest_date")
|
||||
print(f"\nNext steps:")
|
||||
print(f" 1. Review baseline predictions in harvest_production_export.xlsx")
|
||||
print(f" 2. Run weekly monitoring: python 02_harvest_imminent_weekly.py {project_name}")
|
||||
print(f" 1. Review predictions in harvest_production_export.xlsx")
|
||||
print(f" 2. Run weekly monitoring: python 31_harvest_imminent_weekly.py {project_name}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -271,11 +271,17 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
|
|||
return df
|
||||
|
||||
|
||||
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device):
|
||||
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
|
||||
threshold=0.45, consecutive_days=2):
|
||||
"""
|
||||
Phase 1: Growing window detection with threshold crossing.
|
||||
Expand window day-by-day, check last timestep's detected_prob.
|
||||
When 3 consecutive days have prob > 0.5, harvest detected.
|
||||
When N consecutive days have prob > threshold, harvest detected.
|
||||
|
||||
Args:
|
||||
threshold (float): Probability threshold (default 0.45, tuned for Model 307)
|
||||
consecutive_days (int): Required consecutive days above threshold (default 2, reduced from 3 for robustness)
|
||||
|
||||
Returns list of (harvest_date, harvest_idx) tuples.
|
||||
"""
|
||||
harvest_dates = []
|
||||
|
|
@ -306,18 +312,18 @@ def run_phase1_growing_window(field_data, model, config, scalers, ci_column, dev
|
|||
# Check LAST timestep
|
||||
last_prob = detected_probs[-1]
|
||||
|
||||
if last_prob > 0.5:
|
||||
if last_prob > threshold:
|
||||
consecutive_above_threshold += 1
|
||||
else:
|
||||
consecutive_above_threshold = 0
|
||||
|
||||
# Harvest detected: 3 consecutive days above threshold
|
||||
if consecutive_above_threshold >= 3:
|
||||
harvest_date = field_data.iloc[current_pos + window_end - 3]['Date']
|
||||
harvest_dates.append((harvest_date, current_pos + window_end - 3))
|
||||
# Harvest detected: N consecutive days above threshold
|
||||
if consecutive_above_threshold >= consecutive_days:
|
||||
harvest_date = field_data.iloc[current_pos + window_end - consecutive_days]['Date']
|
||||
harvest_dates.append((harvest_date, current_pos + window_end - consecutive_days))
|
||||
|
||||
# Reset to next day after harvest
|
||||
current_pos = current_pos + window_end - 2
|
||||
current_pos = current_pos + window_end - consecutive_days + 1
|
||||
break
|
||||
|
||||
except Exception:
|
||||
|
|
@ -391,12 +397,17 @@ def run_phase2_refinement(field_data, phase1_harvests, model, config, scalers, c
|
|||
return refined_harvests
|
||||
|
||||
|
||||
def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=None):
|
||||
def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=None,
|
||||
phase1_threshold=0.45, phase1_consecutive=2):
|
||||
"""
|
||||
Two-step harvest detection for each field:
|
||||
1. Phase 1: Growing window with 3-day threshold confirmation
|
||||
1. Phase 1: Growing window with threshold confirmation
|
||||
2. Phase 2: ±40 day refinement with argmax
|
||||
|
||||
Args:
|
||||
phase1_threshold (float): Probability threshold for Phase 1 (default 0.45, tuned for Model 307)
|
||||
phase1_consecutive (int): Consecutive days required (default 2, reduced from 3 for robustness)
|
||||
|
||||
Returns list of dicts with field, season_start_date, season_end_date, etc.
|
||||
"""
|
||||
if device is None:
|
||||
|
|
@ -411,6 +422,7 @@ def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=Non
|
|||
harvests_found = 0
|
||||
|
||||
print(f" Processing {total_fields} fields...")
|
||||
print(f" Phase 1 parameters: threshold={phase1_threshold}, consecutive_days={phase1_consecutive}")
|
||||
|
||||
for idx, (field, field_data) in enumerate(field_groups, 1):
|
||||
# Simple progress indicator
|
||||
|
|
@ -423,7 +435,8 @@ def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=Non
|
|||
field_data = field_data.sort_values('Date').reset_index(drop=True)
|
||||
|
||||
# Phase 1: Growing window detection
|
||||
phase1_harvests = run_phase1_growing_window(field_data, model, config, scalers, ci_column, device)
|
||||
phase1_harvests = run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
|
||||
threshold=phase1_threshold, consecutive_days=phase1_consecutive)
|
||||
|
||||
if not phase1_harvests:
|
||||
continue
|
||||
|
|
@ -475,7 +488,7 @@ def build_production_harvest_table(refined_results: List[Dict]) -> pd.DataFrame:
|
|||
# Ensure date columns are datetime
|
||||
df['season_start_date'] = pd.to_datetime(df['season_start_date']).dt.strftime('%Y-%m-%d')
|
||||
df['season_end_date'] = pd.to_datetime(df['season_end_date']).dt.strftime('%Y-%m-%d')
|
||||
df['phase1_harvest_date'] = pd.to_datetime(df['phase1_harvest_date']).dt.strftime('%Y-%m-%d')
|
||||
df['phase2_harvest_date'] = pd.to_datetime(df['phase2_harvest_date']).dt.strftime('%Y-%m-%d')
|
||||
|
||||
print(f"Built production table with {len(df)} field/season combinations")
|
||||
|
||||
|
|
|
|||
|
|
@ -402,4 +402,32 @@ cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
|
|||
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
|
||||
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
|
||||
|
||||
# ============================================================================
|
||||
# WRITE TILING CONFIGURATION METADATA
|
||||
# ============================================================================
|
||||
# This metadata file is read by parameters_project.R to determine mosaic mode
|
||||
# It allows script 40 to know what script 10 decided without re-computing
|
||||
|
||||
cat("\n[10] Writing tiling configuration metadata...\n")
|
||||
|
||||
config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json")
|
||||
config_json <- paste0(
|
||||
'{\n',
|
||||
' "project": "', PROJECT, '",\n',
|
||||
' "has_tiles": ', tolower(N_TILES > 1), ',\n',
|
||||
' "grid_size": "', GRID_SIZE_LABEL, '",\n',
|
||||
' "grid_rows": ', GRID_NROWS, ',\n',
|
||||
' "grid_cols": ', GRID_NCOLS, ',\n',
|
||||
' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n',
|
||||
' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n',
|
||||
' "created_date": "', Sys.Date(), '",\n',
|
||||
' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n',
|
||||
'}\n'
|
||||
)
|
||||
|
||||
writeLines(config_json, config_file)
|
||||
cat(" ✓ Metadata saved to: tiling_config.json\n")
|
||||
cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "")
|
||||
cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "")
|
||||
|
||||
cat("\n✓ Script complete!\n")
|
||||
|
|
|
|||
|
|
@ -884,13 +884,8 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin
|
|||
names(blue_band) <- "Blue"
|
||||
names(nir_band) <- "NIR"
|
||||
|
||||
# Create CI band
|
||||
if (raster_info$type == "4b") {
|
||||
ci_band <- (nir_band - red_band) / (nir_band + red_band)
|
||||
} else if (raster_info$type == "8b") {
|
||||
red_edge <- tile_rast[[raster_info$red_idx]]
|
||||
ci_band <- (nir_band - red_edge) / (nir_band + red_edge)
|
||||
}
|
||||
# Create CI band inline: NIR/Green - 1
|
||||
ci_band <- nir_band / green_band - 1
|
||||
names(ci_band) <- "CI"
|
||||
|
||||
# Create output raster with Red, Green, Blue, NIR, CI
|
||||
|
|
@ -1018,33 +1013,3 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
|
|||
|
||||
return(aggregated)
|
||||
}
|
||||
|
||||
#' Create CI band from available bands (if not pre-computed)
|
||||
#'
|
||||
#' @param raster Loaded raster object
|
||||
#' @param raster_info Output from detect_raster_structure()
|
||||
#' @return Single-layer raster with CI band
|
||||
#'
|
||||
create_ci_band <- function(raster, raster_info) {
|
||||
if (raster_info$type == "4b") {
|
||||
# Calculate NDVI for 4-band data: (NIR - Red) / (NIR + Red)
|
||||
red <- raster[[raster_info$red_idx]]
|
||||
nir <- raster[[raster_info$nir_idx]]
|
||||
ci <- (nir - red) / (nir + red)
|
||||
} else if (raster_info$type == "8b") {
|
||||
# Use RedEdge for 8-band data: (NIR - RedEdge) / (NIR + RedEdge)
|
||||
red_edge <- raster[[raster_info$red_idx]]
|
||||
nir <- raster[[raster_info$nir_idx]]
|
||||
ci <- (nir - red_edge) / (nir + red_edge)
|
||||
} else {
|
||||
stop("Unsupported raster type")
|
||||
}
|
||||
|
||||
# Apply cloud mask if available (UDM band)
|
||||
if (!is.na(raster_info$udm_idx)) {
|
||||
udm <- raster[[raster_info$udm_idx]]
|
||||
ci <- terra::mask(ci, udm, maskvalues = 0)
|
||||
}
|
||||
|
||||
return(ci)
|
||||
}
|
||||
|
|
@ -127,11 +127,23 @@ main <- function() {
|
|||
|
||||
safe_log(paste("Output will be saved as:", file_name_tif))
|
||||
|
||||
# 5. Create weekly per-tile MAX mosaics
|
||||
# ----------------------------------
|
||||
# 5. Create weekly mosaics - route based on project tile detection
|
||||
# ---------------------------------------------------------------
|
||||
# The use_tile_mosaic flag is auto-detected by parameters_project.R
|
||||
# based on whether tiles exist in merged_final_tif/
|
||||
|
||||
if (!exists("use_tile_mosaic")) {
|
||||
# Fallback detection if flag not set (shouldn't happen)
|
||||
merged_final_dir <- file.path(laravel_storage, "merged_final_tif")
|
||||
tile_detection <- detect_mosaic_mode(merged_final_dir)
|
||||
use_tile_mosaic <- tile_detection$has_tiles
|
||||
}
|
||||
|
||||
if (use_tile_mosaic) {
|
||||
# TILE-BASED APPROACH: Create per-tile weekly MAX mosaics
|
||||
# This is used for projects like Angata with large ROIs requiring spatial partitioning
|
||||
tryCatch({
|
||||
safe_log("Starting per-tile mosaic creation...")
|
||||
safe_log("Starting per-tile mosaic creation (tile-based approach)...")
|
||||
|
||||
# Set output directory for per-tile mosaics
|
||||
tile_output_base <- file.path(laravel_storage, "weekly_tile_max")
|
||||
|
|
@ -146,10 +158,37 @@ main <- function() {
|
|||
safe_log(paste("✓ Per-tile mosaic creation completed - created",
|
||||
length(created_tile_files), "tile files"))
|
||||
}, error = function(e) {
|
||||
safe_log(paste("ERROR in mosaic creation:", e$message), "WARNING")
|
||||
safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR")
|
||||
traceback()
|
||||
stop("Mosaic creation failed")
|
||||
})
|
||||
|
||||
} else {
|
||||
# SINGLE-FILE APPROACH: Create single weekly mosaic file
|
||||
# This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output
|
||||
tryCatch({
|
||||
safe_log("Starting single-file mosaic creation (backward-compatible approach)...")
|
||||
|
||||
# Set output directory for single-file mosaics
|
||||
single_file_output_dir <- file.path(laravel_storage, "weekly_mosaic")
|
||||
|
||||
created_file <- create_weekly_mosaic(
|
||||
dates = dates,
|
||||
field_boundaries = field_boundaries,
|
||||
daily_vrt_dir = daily_vrt,
|
||||
merged_final_dir = merged_final,
|
||||
output_dir = single_file_output_dir,
|
||||
file_name_tif = file_name_tif,
|
||||
create_plots = FALSE
|
||||
)
|
||||
|
||||
safe_log(paste("✓ Single-file mosaic creation completed:", created_file))
|
||||
}, error = function(e) {
|
||||
safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR")
|
||||
traceback()
|
||||
stop("Mosaic creation failed")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (sys.nframe() == 0) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,38 @@
|
|||
# Utility functions for creating weekly mosaics from daily satellite imagery.
|
||||
# These functions support cloud cover assessment, date handling, and mosaic creation.
|
||||
|
||||
#' Detect whether a project uses tile-based or single-file mosaic approach
|
||||
#'
|
||||
#' @param merged_final_tif_dir Directory containing merged_final_tif files
|
||||
#' @return List with has_tiles (logical), detected_tiles (vector), total_files (count)
|
||||
#'
|
||||
detect_mosaic_mode <- function(merged_final_tif_dir) {
|
||||
# Check if directory exists
|
||||
if (!dir.exists(merged_final_tif_dir)) {
|
||||
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
|
||||
}
|
||||
|
||||
# List all .tif files in merged_final_tif
|
||||
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
|
||||
|
||||
if (length(tif_files) == 0) {
|
||||
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
|
||||
}
|
||||
|
||||
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
|
||||
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
|
||||
tile_pattern <- "_(\\d{2})\\.tif$"
|
||||
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
|
||||
|
||||
has_tiles <- length(tile_files) > 0
|
||||
|
||||
return(list(
|
||||
has_tiles = has_tiles,
|
||||
detected_tiles = tile_files,
|
||||
total_files = length(tif_files)
|
||||
))
|
||||
}
|
||||
|
||||
#' Safe logging function
|
||||
#' @param message The message to log
|
||||
#' @param level The log level (default: "INFO")
|
||||
|
|
|
|||
|
|
@ -13,8 +13,81 @@ suppressPackageStartupMessages({
|
|||
library(sf)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(jsonlite) # For reading tiling_config.json
|
||||
})
|
||||
|
||||
# 2. Smart detection for tile-based vs single-file mosaic approach
|
||||
# ----------------------------------------------------------------
|
||||
detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
|
||||
# PRIORITY 1: Check for tiling_config.json metadata file from script 10
|
||||
# This is the most reliable source since script 10 explicitly records its decision
|
||||
|
||||
if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
|
||||
# Try to find tiling_config.json in any grid-size subfolder
|
||||
config_files <- list.files(daily_tiles_split_dir,
|
||||
pattern = "tiling_config\\.json$",
|
||||
recursive = TRUE,
|
||||
full.names = TRUE)
|
||||
|
||||
if (length(config_files) > 0) {
|
||||
# Found a config file - use the most recent one
|
||||
config_file <- config_files[which.max(file.info(config_files)$mtime)]
|
||||
|
||||
tryCatch({
|
||||
config_json <- jsonlite::read_json(config_file)
|
||||
return(list(
|
||||
has_tiles = config_json$has_tiles %||% TRUE,
|
||||
detected_tiles = character(),
|
||||
total_files = 0,
|
||||
source = "tiling_config.json",
|
||||
grid_size = config_json$grid_size %||% "unknown"
|
||||
))
|
||||
}, error = function(e) {
|
||||
warning("Error reading tiling_config.json: ", e$message)
|
||||
# Fall through to file-based detection
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
# PRIORITY 2: File-based detection (fallback if metadata not found)
|
||||
# Check if merged_final_tif/ contains tile-named files
|
||||
|
||||
if (!dir.exists(merged_final_tif_dir)) {
|
||||
return(list(
|
||||
has_tiles = FALSE,
|
||||
detected_tiles = character(),
|
||||
total_files = 0,
|
||||
source = "directory_not_found"
|
||||
))
|
||||
}
|
||||
|
||||
# List all .tif files in merged_final_tif
|
||||
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
|
||||
|
||||
if (length(tif_files) == 0) {
|
||||
return(list(
|
||||
has_tiles = FALSE,
|
||||
detected_tiles = character(),
|
||||
total_files = 0,
|
||||
source = "no_files_found"
|
||||
))
|
||||
}
|
||||
|
||||
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
|
||||
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
|
||||
tile_pattern <- "_(\\d{2})\\.tif$"
|
||||
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
|
||||
|
||||
has_tiles <- length(tile_files) > 0
|
||||
|
||||
return(list(
|
||||
has_tiles = has_tiles,
|
||||
detected_tiles = tile_files,
|
||||
total_files = length(tif_files),
|
||||
source = "file_pattern_detection"
|
||||
))
|
||||
}
|
||||
|
||||
# 2. Define project directory structure
|
||||
# -----------------------------------
|
||||
setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") {
|
||||
|
|
@ -26,6 +99,16 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
|
|||
# Alternative: merged_tif for 4-band legacy data
|
||||
merged_tif_folder <- here(laravel_storage_dir, data_source)
|
||||
|
||||
# Detect tile mode based on metadata from script 10 or file patterns
|
||||
merged_final_dir <- here(laravel_storage_dir, "merged_final_tif")
|
||||
daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")
|
||||
|
||||
tile_detection <- detect_mosaic_mode(
|
||||
merged_final_tif_dir = merged_final_dir,
|
||||
daily_tiles_split_dir = daily_tiles_split_dir
|
||||
)
|
||||
use_tile_mosaic <- tile_detection$has_tiles
|
||||
|
||||
# Main subdirectories
|
||||
dirs <- list(
|
||||
reports = here(laravel_storage_dir, "reports"),
|
||||
|
|
@ -33,7 +116,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
|
|||
data = here(laravel_storage_dir, "Data"),
|
||||
tif = list(
|
||||
merged = merged_tif_folder, # Use data_source parameter to select folder
|
||||
final = here(laravel_storage_dir, "merged_final_tif")
|
||||
final = merged_final_dir
|
||||
),
|
||||
weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"),
|
||||
weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"),
|
||||
|
|
@ -61,9 +144,17 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
|
|||
merged_final = dirs$tif$final,
|
||||
daily_CI_vals_dir = dirs$extracted_ci$daily,
|
||||
cumulative_CI_vals_dir = dirs$extracted_ci$cumulative,
|
||||
weekly_CI_mosaic = dirs$weekly_mosaic,
|
||||
weekly_CI_mosaic = if (use_tile_mosaic) dirs$weekly_tile_max else dirs$weekly_mosaic, # SMART: Route based on tile detection
|
||||
daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction
|
||||
weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output)
|
||||
use_tile_mosaic = use_tile_mosaic, # Flag indicating if tiles are used for this project
|
||||
tile_detection_info = list(
|
||||
has_tiles = tile_detection$has_tiles,
|
||||
detected_source = tile_detection$source,
|
||||
detected_count = tile_detection$total_files,
|
||||
grid_size = tile_detection$grid_size %||% "unknown",
|
||||
sample_tiles = head(tile_detection$detected_tiles, 3)
|
||||
),
|
||||
harvest_dir = dirs$harvest,
|
||||
extracted_CI_dir = dirs$extracted_ci$base
|
||||
))
|
||||
|
|
@ -329,8 +420,20 @@ if (exists("project_dir")) {
|
|||
# Expose all variables to the global environment
|
||||
list2env(project_config, envir = .GlobalEnv)
|
||||
|
||||
# Log project initialization completion
|
||||
# Log project initialization completion with tile mode info
|
||||
log_message(paste("Project initialized with directory:", project_dir))
|
||||
if (exists("use_tile_mosaic")) {
|
||||
mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE"
|
||||
log_message(paste("Mosaic mode detected:", mosaic_mode))
|
||||
if (exists("tile_detection_info") && !is.null(tile_detection_info)) {
|
||||
log_message(paste(" - Detection source:", tile_detection_info$detected_source))
|
||||
log_message(paste(" - Grid size:", tile_detection_info$grid_size))
|
||||
log_message(paste(" - Detected files in storage:", tile_detection_info$detected_count))
|
||||
if (length(tile_detection_info$sample_tiles) > 0) {
|
||||
log_message(paste(" - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", ")))
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -703,9 +703,30 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
|
|||
target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed
|
||||
target_year <- lubridate::isoyear(target_date)
|
||||
|
||||
# Generate the file path for the target week
|
||||
# Primary approach: Try single-file mosaic path first
|
||||
path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif"))
|
||||
|
||||
# Smart fallback: If single-file doesn't exist AND path contains "weekly_mosaic", check for tiles
|
||||
if (!file.exists(path_to_week) && grepl("weekly_mosaic", mosaic_path)) {
|
||||
# Try to locate tile-based mosaics in weekly_tile_max instead
|
||||
tile_mosaic_path <- sub("weekly_mosaic", "weekly_tile_max", mosaic_path)
|
||||
|
||||
# Look for any tile files matching the week pattern (e.g., week_XX_YYYY_00.tif, week_XX_YYYY_01.tif, etc.)
|
||||
if (dir.exists(tile_mosaic_path)) {
|
||||
tile_files <- list.files(tile_mosaic_path,
|
||||
pattern = paste0("^week_", target_week, "_", target_year, "_(\\d{2})\\.tif$"),
|
||||
full.names = TRUE)
|
||||
|
||||
if (length(tile_files) > 0) {
|
||||
# Found tiles - return the first tile as primary, note that multiple tiles exist
|
||||
safe_log(paste("Single-file mosaic not found for week", target_week, target_year,
|
||||
"but found", length(tile_files), "tile files in weekly_tile_max. Using tile approach."), "INFO")
|
||||
# Return first tile - caller should aggregate if needed
|
||||
path_to_week <- tile_files[1] # Return first tile; downstream can handle multiple tiles
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Log the path calculation
|
||||
safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
# Wrapper script to set project_dir and run KPI calculation
|
||||
project_dir <- "esa"
|
||||
source("r_app/09_calculate_kpis.R")
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
# Simple SAR Data Test
|
||||
# ====================
|
||||
|
||||
cat("Testing SAR data loading...\n")
|
||||
|
||||
# Load only essential libraries
|
||||
library(terra, quietly = TRUE)
|
||||
|
||||
# Test loading one SAR file
|
||||
sar_file <- "python_scripts/data/aura/weekly_SAR_mosaic/week_33_2025_VV_dB_filtered.tif"
|
||||
|
||||
if (file.exists(sar_file)) {
|
||||
cat("✓ SAR file found:", sar_file, "\n")
|
||||
|
||||
# Load the raster
|
||||
sar_data <- rast(sar_file)
|
||||
|
||||
cat("✓ SAR data loaded successfully\n")
|
||||
cat(" Dimensions:", dim(sar_data), "\n")
|
||||
cat(" CRS:", crs(sar_data), "\n")
|
||||
cat(" Value range:", round(global(sar_data, range, na.rm = TRUE)[,1], 2), "dB\n")
|
||||
|
||||
# Test basic statistics
|
||||
mean_val <- global(sar_data, mean, na.rm = TRUE)[1,1]
|
||||
sd_val <- global(sar_data, sd, na.rm = TRUE)[1,1]
|
||||
|
||||
cat(" Mean backscatter:", round(mean_val, 2), "dB\n")
|
||||
cat(" Standard deviation:", round(sd_val, 2), "dB\n")
|
||||
|
||||
cat("\n✓ SAR data test successful!\n")
|
||||
cat("Ready to proceed with full analysis.\n")
|
||||
|
||||
} else {
|
||||
cat("✗ SAR file not found:", sar_file, "\n")
|
||||
}
|
||||
Loading…
Reference in a new issue