Cleanup: Fix CI formula, reorganize shell scripts and test files

- Fixed CI calculation: changed from NDVI (NIR-Red)/(NIR+Red) to correct NIR/Green-1 formula in:
  * process_single_tile() function
  * create_ci_band() utility function
  * Updated create_mask_and_crop() documentation

- Renamed numbered shell scripts for clarity (matching R script numbering):
  * 01_run_planet_download -> 10_planet_download.sh
  * 02_run_ci_extraction -> 20_ci_extraction.sh
  * 03_run_growth_model -> 30_growth_model.sh
  * 04_run_mosaic_creation -> 40_mosaic_creation.sh
  * 09_run_calculate_kpis -> 80_calculate_kpis.sh
  * 10_run_kpi_report -> 90_kpi_report.sh

- Archived obsolete shell scripts to old_sh/:
  * build_mosaic.sh, build_report.sh, interpolate_growth_model.sh
  * 05_run_dashboard_report.sh, 06_run_crop_messaging.sh
  * 11_run_yield_prediction.sh/ps1
  * runcane.sh, runpython.sh, smartcane.sh, update_RDS.sh

- Deleted test/debug files and temporary outputs:
  * analyze_*.R, benchmark_gpu_vs_cpu.py, convert_angata_harvest.py
  * debug_mosaic.R, examine_kpi_results.R, generate_sar_report.R
  * inspect_8band_structure.R, inspect_tif_bands.R
  * old_working_utils.R, predict_harvest_operational.R
  * run_kpi_calculation.R, run_report.R, simple_sar_test.R
  * data_validation_tool/, harvest_ci_pattern_analysis.png, kpi_debug.out

- Enhanced harvest prediction: Added threshold tuning (0.40-0.45) and field type handling

- Enhanced mosaic creation: Improved tile detection and routing logic
This commit is contained in:
Timon 2026-01-14 16:58:51 +01:00
parent d365b5838b
commit 458b8247be
52 changed files with 521 additions and 37062 deletions

512
.Rhistory
View file

@ -1,512 +0,0 @@
message("No project_dir provided. Using default:", project_dir)
}
# Make project_dir available globally so parameters_project.R can use it
assign("project_dir", project_dir, envir = .GlobalEnv)
# Initialize project configuration and load utility functions
tryCatch({
source("parameters_project.R")
source("growth_model_utils.R")
}, error = function(e) {
warning("Default source files not found. Attempting to source from 'r_app' directory.")
tryCatch({
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "growth_model_utils.R"))
warning(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) {
stop("Failed to source required files from both default and 'r_app' directories.")
})
})
log_message("Starting CI growth model interpolation")
# Load and process the data
tryCatch({
# Load the combined CI data
CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
# Validate harvesting data
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
stop("No harvesting data available")
}
# Get the years from harvesting data
years <- harvesting_data %>%
filter(!is.na(season_start)) %>%
distinct(year) %>%
pull(year)
log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
# Generate interpolated CI data for each year and field
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
# Calculate growth metrics and save the results
if (nrow(CI_all) > 0) {
# Add daily and cumulative metrics
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
# Save the processed data
save_growth_model(
CI_all_with_metrics,
cumulative_CI_vals_dir,
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
)
} else {
log_message("No CI data was generated after interpolation", level = "WARNING")
}
log_message("Growth model interpolation completed successfully")
}, error = function(e) {
log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
stop(e$message)
})
View(CI_all_with_metrics)
View(CI_data)
# Get the years from harvesting data
years <- harvesting_data %>%
filter(!is.na(season_start)) %>%
distinct(year) %>%
pull(year)
years
View(CI_all)
View(CI_all_with_metrics)
years
harvesting_data
ci_data
ci_data = CI_data
# Process each year
result <- purrr::map_df(years, function(yr) {
safe_log(paste("Processing year:", yr))
# Get the fields harvested in this year with valid season start dates
sub_fields <- harvesting_data %>%
dplyr::filter(year == yr, !is.na(season_start)) %>%
dplyr::pull(sub_field)
if (length(sub_fields) == 0) {
safe_log(paste("No fields with valid season data for year:", yr), "WARNING")
return(data.frame())
}
# Filter sub_fields to only include those with value data in ci_data
valid_sub_fields <- sub_fields %>%
purrr::keep(~ any(ci_data$sub_field == .x))
if (length(valid_sub_fields) == 0) {
safe_log(paste("No fields with CI data for year:", yr), "WARNING")
return(data.frame())
}
# Extract and interpolate data for each valid field
safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr))
result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x,
harvesting_data = harvesting_data,
field_CI_data = ci_data,
season = yr)) %>%
purrr::list_rbind()
safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr))
return(result)
})
CI_all_with_metrics
CI_all <- CI_all %>%
group_by(Date, field, season) %>%
filter(!(field == "00F25" & season == 2023 & duplicated(DOY)))
View(CI_all)
# Add daily and cumulative metrics
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
# Save the processed data
save_growth_model(
CI_all_with_metrics,
cumulative_CI_vals_dir,
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
)
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
ci_plot_type <- params$ci_plot_type
colorblind_friendly <- params$colorblind_friendly
facet_by_season <- params$facet_by_season
x_axis_unit <- params$x_axis_unit
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Load all packages at once with suppressPackageStartupMessages
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(exactextractr)
library(tidyverse)
library(tmap)
library(lubridate)
library(zoo)
library(rsample)
library(caret)
library(randomForest)
library(CAST)
library(knitr)
library(tidyr)
})
# Load custom utility functions
tryCatch({
source("report_utils.R")
}, error = function(e) {
message(paste("Error loading report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "report_utils.R"))
}, error = function(e) {
stop("Could not load report_utils.R from either location: ", e$message)
})
})
# Set the project directory from parameters
project_dir <- params$data_dir
# Source project parameters with error handling
tryCatch({
source(here::here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Log initial configuration
safe_log("Starting the R Markdown script with KPIs")
safe_log(paste("mail_day params:", params$mail_day))
safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
## SIMPLE KPI LOADING - robust lookup with fallbacks
# Primary expected directory inside the laravel storage
kpi_data_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis")
date_suffix <- format(as.Date(report_date), "%Y%m%d")
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
paste0(project_dir, "_kpi_summary_tables.rds"),
"kpi_summary_tables.rds",
paste0("kpi_summary_tables_", date_suffix, ".rds")
)
expected_field_details_names <- c(
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
paste0(project_dir, "_field_details.rds"),
"field_details.rds"
)
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
try_load_from_dir <- function(dir, candidates) {
if (!dir.exists(dir)) return(NULL)
for (name in candidates) {
f <- file.path(dir, name)
if (file.exists(f)) return(f)
}
return(NULL)
}
# Try primary directory first
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
if (is.null(summary_file) || is.null(field_details_file)) {
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
# List rds files under laravel_app/storage/app recursively
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
# Try to match by expected names
if (is.null(summary_file)) {
matched <- files[basename(files) %in% expected_summary_names]
if (length(matched) > 0) summary_file <- matched[1]
}
if (is.null(field_details_file)) {
matched2 <- files[basename(files) %in% expected_field_details_names]
if (length(matched2) > 0) field_details_file <- matched2[1]
}
}
# Final checks and load with safe error messages
kpi_files_exist <- FALSE
if (!is.null(summary_file) && file.exists(summary_file)) {
safe_log(paste("Loading KPI summary from:", summary_file))
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
} else {
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
}
if (!is.null(field_details_file) && file.exists(field_details_file)) {
safe_log(paste("Loading field details from:", field_details_file))
field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL })
if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE
} else {
safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING")
}
if (kpi_files_exist) {
safe_log("✓ KPI summary tables loaded successfully")
} else {
safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING")
}
# Set locale for consistent date formatting
Sys.setlocale("LC_TIME", "C")
# Initialize date variables from parameters
today <- as.character(report_date)
mail_day_as_character <- as.character(mail_day)
# Calculate report dates and weeks
report_date_obj <- as.Date(today)
current_week <- as.numeric(format(report_date_obj, "%U"))
year <- as.numeric(format(report_date_obj, "%Y"))
# Calculate dates for weekly analysis
week_start <- report_date_obj - ((as.numeric(format(report_date_obj, "%w")) + 1) %% 7)
week_end <- week_start + 6
# Calculate week days (copied from 05 script for compatibility)
report_date_as_week_day <- weekdays(lubridate::ymd(today))
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
# Calculate initial week number
week <- lubridate::week(today) - 1
safe_log(paste("Initial week calculation:", week, "today:", today))
# Calculate previous dates for comparisons
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
# Adjust week calculation based on mail day
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
safe_log("Adjusting weeks because of mail day")
week <- lubridate::week(today) + 1
today_minus_1 <- as.character(lubridate::ymd(today))
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
}
# Calculate week numbers for previous weeks
week_minus_1 <- week - 1
week_minus_2 <- week - 2
week_minus_3 <- week - 3
# Format current week with leading zeros
week <- sprintf("%02d", week)
safe_log(paste("Report week:", current_week, "Year:", year))
safe_log(paste("Week range:", week_start, "to", week_end))
## SIMPLE KPI LOADING - robust lookup with fallbacks
# Primary expected directory inside the laravel storage
kpi_data_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis")
date_suffix <- format(as.Date(report_date), "%Y%m%d")
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
paste0(project_dir, "_kpi_summary_tables.rds"),
"kpi_summary_tables.rds",
paste0("kpi_summary_tables_", date_suffix, ".rds")
)
expected_field_details_names <- c(
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
paste0(project_dir, "_field_details.rds"),
"field_details.rds"
)
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
try_load_from_dir <- function(dir, candidates) {
if (!dir.exists(dir)) return(NULL)
for (name in candidates) {
f <- file.path(dir, name)
if (file.exists(f)) return(f)
}
return(NULL)
}
# Try primary directory first
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
if (is.null(summary_file) || is.null(field_details_file)) {
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
# List rds files under laravel_app/storage/app recursively
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
# Try to match by expected names
if (is.null(summary_file)) {
matched <- files[basename(files) %in% expected_summary_names]
if (length(matched) > 0) summary_file <- matched[1]
}
if (is.null(field_details_file)) {
matched2 <- files[basename(files) %in% expected_field_details_names]
if (length(matched2) > 0) field_details_file <- matched2[1]
}
}
# Final checks and load with safe error messages
kpi_files_exist <- FALSE
if (!is.null(summary_file) && file.exists(summary_file)) {
safe_log(paste("Loading KPI summary from:", summary_file))
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
} else {
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
}
if (!is.null(field_details_file) && file.exists(field_details_file)) {
safe_log(paste("Loading field details from:", field_details_file))
field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL })
if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE
} else {
safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING")
}
if (kpi_files_exist) {
safe_log("✓ KPI summary tables loaded successfully")
} else {
safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING")
}
## SIMPLE KPI LOADING - robust lookup with fallbacks
# Primary expected directory inside the laravel storage
kpi_data_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis")
kpi_data_dir
kpi_data_dir
## SIMPLE KPI LOADING - robust lookup with fallbacks
# Primary expected directory inside the laravel storage
kpi_data_dir <- file.path(here("laravel_app", "storage", "app", project_dir, "reports", "kpis"))
kpi_data_dir
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
paste0(project_dir, "_kpi_summary_tables.rds"),
"kpi_summary_tables.rds",
paste0("kpi_summary_tables_", date_suffix, ".rds")
)
expected_field_details_names <- c(
paste0(project_dir, "_field_details_", date_suffix, ".rds"),
paste0(project_dir, "_field_details.rds"),
"field_details.rds"
)
# Helper to attempt loading a file from the directory or fallback to a workspace-wide search
try_load_from_dir <- function(dir, candidates) {
if (!dir.exists(dir)) return(NULL)
for (name in candidates) {
f <- file.path(dir, name)
if (file.exists(f)) return(f)
}
return(NULL)
}
# Try primary directory first
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
if (is.null(summary_file) || is.null(field_details_file)) {
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
# List rds files under laravel_app/storage/app recursively
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
# Try to match by expected names
if (is.null(summary_file)) {
matched <- files[basename(files) %in% expected_summary_names]
if (length(matched) > 0) summary_file <- matched[1]
}
if (is.null(field_details_file)) {
matched2 <- files[basename(files) %in% expected_field_details_names]
if (length(matched2) > 0) field_details_file <- matched2[1]
}
}
# Final checks and load with safe error messages
kpi_files_exist <- FALSE
if (!is.null(summary_file) && file.exists(summary_file)) {
safe_log(paste("Loading KPI summary from:", summary_file))
summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
} else {
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
}
summary_file
kpi_data_dir
library(officer)
library(flextable)
# Data setup
summary_tables <- list()
summary_tables$field_uniformity_summary <- data.frame(
"Uniformity Level" = c("Excellent", "Good", "Poor"),
"Count" = c(15, 8, 3),
"Percent" = c("62.5%", "33.3%", "12.5%")
)
summary_tables$weed_presence_summary <- data.frame(
"Weed Risk Level" = c("Low", "Moderate", "High"),
"Field Count" = c(18, 6, 2),
"Percent" = c("75.0%", "25.0%", "8.3%")
)
doc <- read_docx()
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
doc <- body_add_par(doc, "Executive Summary - Key Performance Indicators", style = "heading 2")
doc <- body_add_par(doc, "This section demonstrates just two KPI tables side by side.", style = "Normal")
doc <- body_add_section(doc, prop_section(
section_type = "continuous",
columns = columns(widths = c(4.25, 4.25))
))
doc <- body_add_flextable(doc, flextable(summary_tables$field_uniformity_summary) %>% set_caption("Field Uniformity Summary"))
doc <- body_add_break(doc, "column")
doc <- body_add_flextable(doc, flextable(summary_tables$weed_presence_summary) %>% set_caption("Weed Presence Score Summary"))
doc <- body_add_section(doc, prop_section(
section_type = "continuous",
columns = columns(widths = c(8.5))
))
doc <- body_add_par(doc, "This is a test report to verify the KPI grid layout.", style = "Normal")
print(doc, target = "tables_side_by_side.docx")
here()
getwd()
print(doc, target = "tables_side_by_side.docx")
doc
print(doc, target = "tables_side_by_side.docx")
print(doc, target = "r_app/tables_side_by_side.docx")
library(officer)
library(flextable)
# Create example data
summary_tables <- list()
summary_tables$field_uniformity_summary <- data.frame(
"Uniformity Level" = c("Excellent", "Good", "Poor"),
"Count" = c(15, 8, 3),
"Percent" = c("62.5%", "33.3%", "12.5%")
)
summary_tables$weed_presence_summary <- data.frame(
"Weed Risk Level" = c("Low", "Moderate", "High"),
"Field Count" = c(18, 6, 2),
"Percent" = c("75.0%", "25.0%", "8.3%")
)
# Create document
doc <- read_docx()
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
doc <- body_add_par(doc, "Executive Summary - Key Performance Indicators", style = "heading 2")
doc <- body_add_par(doc, "This section demonstrates just two KPI tables side by side.", style = "Normal")
# Two-column section
doc <- body_add_section(doc, prop_section(
section_type = "continuous",
columns = columns(widths = c(4.25, 4.25))
))
library(officer)
library(flextable)
# Create example data
summary_tables <- list()
summary_tables$field_uniformity_summary <- data.frame(
"Uniformity Level" = c("Excellent", "Good", "Poor"),
"Count" = c(15, 8, 3),
"Percent" = c("62.5%", "33.3%", "12.5%")
)
summary_tables$weed_presence_summary <- data.frame(
"Weed Risk Level" = c("Low", "Moderate", "High"),
"Field Count" = c(18, 6, 2),
"Percent" = c("75.0%", "25.0%", "8.3%")
)
# Create document
doc <- read_docx()
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
doc <- body_add_par(doc, "Executive Summary - Key Performance Indicators", style = "heading 2")
doc <- body_add_par(doc, "This section demonstrates just two KPI tables side by side.", style = "Normal")
# Two-column section
doc <- body_add_section(doc, prop_section(
section_type = "continuous",
columns = columns(widths = c(4.25, 4.25))
))
packageVersion("officer")
??body_add_section
library(officer)
?body_add_section
library(officer)
library(flextable)
# Create example data
ft1 <- flextable(data.frame(
"Uniformity Level" = c("Excellent", "Good", "Poor"),
"Count" = c(15, 8, 3),
"Percent" = c("62.5%", "33.3%", "12.5%")
)) %>% set_caption("Field Uniformity Summary")
ft2 <- flextable(data.frame(
"Weed Risk Level" = c("Low", "Moderate", "High"),
"Field Count" = c(18, 6, 2),
"Percent" = c("75.0%", "25.0%", "8.3%")
)) %>% set_caption("Weed Presence Score Summary")
doc <- read_docx()
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
library(dplyr)
# Create example data
ft1 <- flextable(data.frame(
"Uniformity Level" = c("Excellent", "Good", "Poor"),
"Count" = c(15, 8, 3),
"Percent" = c("62.5%", "33.3%", "12.5%")
)) %>% set_caption("Field Uniformity Summary")
ft2 <- flextable(data.frame(
"Weed Risk Level" = c("Low", "Moderate", "High"),
"Field Count" = c(18, 6, 2),
"Percent" = c("75.0%", "25.0%", "8.3%")
)) %>% set_caption("Weed Presence Score Summary")
doc <- read_docx()
doc <- body_add_par(doc, "KPI Grid Test Report", style = "heading 1")
doc <- body_add_par(doc, "Executive Summary - Key Performance Indicators", style = "heading 2")
doc <- body_add_par(doc, "This section demonstrates two KPI tables side by side.", style = "Normal")
# Create a Word table (1 row, 2 columns)
doc <- body_add_table(doc, value = data.frame(A = "", B = ""), style = "Table Grid")
# Move cursor to first cell, insert first flextable
doc <- cursor_forward(doc)
doc <- slip_in_flextable(doc, ft1, pos = "on")
# Move cursor to second cell, insert second flextable
doc <- cursor_forward(doc)

View file

@ -1,36 +0,0 @@
#!/bin/bash
# Run planet_download for Kibos since September 2023 till today
# Usage: ./01_run_planet_download.sh --project_dir=kibos --date=2023-09-01 --days=<number_of_days>
project_dir="kibos"
date="2023-09-01"
days=1
bbox=""
for arg in "$@"; do
case "$arg" in
--days=*)
days="${arg#*=}"
;;
--date=*)
date="${arg#*=}"
;;
--project_dir=*)
project_dir="${arg#*=}"
;;
--bbox=*)
bbox="${arg#*=}"
;;
*)
echo "Unknown option: $arg"
exit 1
;;
esac
shift
done
echo "Running planet_download for $project_dir from $date for $days days."
script_dir="$(dirname "$0")"
source "$script_dir/python_app/myenv/bin/activate"
jupyter nbconvert --execute --to script --stdout "$script_dir/python_app/planet_download.ipynb"
deactivate

View file

@ -1,31 +0,0 @@
#!/bin/bash
# Run ci_extraction.R
# Usage: ./02_run_ci_extraction.sh --end_date=<YYYY-MM-DD> --offset=<days> --project_dir=kibos
end_date=$(date +'%Y-%m-%d')
offset=28
project_dir="kibos"
for arg in "$@"; do
case $arg in
--end_date=*)
end_date="${arg#*=}"
;;
--offset=*)
offset="${arg#*=}"
;;
--project_dir=*)
project_dir="${arg#*=}"
;;
*)
echo "Unknown option: $arg"
exit 1
;;
esac
shift
done
echo "Running ci_extraction.R for $project_dir with end_date $end_date and offset $offset."
cd r_app
Rscript 02_ci_extraction.R $end_date $offset $project_dir
cd ..

View file

@ -1,35 +0,0 @@
#!/bin/bash
# Run mosaic_creation.R
# Usage: ./04_run_mosaic_creation.sh --end_date=<YYYY-MM-DD> --offset=<days> --data_dir=kibos --file_name_tif=<filename>
end_date="$(date +%Y-%m-%d)"
offset=7
data_dir="kibos"
file_name_tif="week_03_2024.tif"
for arg in "$@"; do
case $arg in
--offset=*)
offset="${arg#*=}"
;;
--end_date=*)
end_date="${arg#*=}"
;;
--data_dir=*)
data_dir="${arg#*=}"
;;
--file_name_tif=*)
file_name_tif="${arg#*=}"
;;
*)
echo "Unknown option: $arg"
exit 1
;;
esac
shift
done
echo "Running mosaic_creation.R for $data_dir with end_date $end_date, offset $offset, file $file_name_tif."
cd r_app
Rscript 04_mosaic_creation.R $end_date $offset $data_dir $file_name_tif
cd ..

50
10_planet_download.sh Normal file
View file

@ -0,0 +1,50 @@
#!/bin/bash
date=$(date +%Y-%m-%d)
# Standaardwaarde voor days
days=1
project_dir="chemba"
# Loop door alle argumenten
for arg in "$@"; do
case "$arg" in
--days=*)
days="${arg#*=}"
;;
--date=*)
date="${arg#*=}"
;;
--project_dir=*)
project_dir="${arg#*=}"
;;
--bbox=*)
bbox="${arg#*=}"
;;
*)
echo "Onbekende optie: $arg"
exit 1
;;
esac
shift
done
# Gebruik de variabele in je script
echo "Datum: $date"
echo "Aantal dagen: $days"
echo "Project directory: $project_dir"
echo "BBOX: $bbox"
# Activeer de virtuele omgeving
script_dir="$(dirname "$0")"
source "$script_dir/python_app/myenv/bin/activate"
echo "$script_dir/python_app/planet_download.ipynb"
export DAYS=$days
export DATE=$date
export PROJECT_DIR=$project_dir
export BBOX=$bbox
# Hier kan je verdere stappen toevoegen, zoals het uitvoeren van je Python-script of Jupyter Notebook
jupyter nbconvert --execute --to script --stdout "$script_dir/python_app/planet_download.ipynb" #needs to be calling 00_download_8band_pu_optimized.py instead of the notebook directly
# Deactiveer de virtuele omgeving (optioneel)
deactivate

37
20_ci_extraction.sh Normal file
View file

@ -0,0 +1,37 @@
end_date=$(date +'%Y-%m-%d')
offset=28
project_dir="Bagamoyo_trial"
# Parse command line arguments
for arg in "$@"; do
case $arg in
--end_date=*)
end_date="${arg#*=}"
;;
--offset=*)
offset="${arg#*=}"
;;
--project_dir=*)
project_dir="${arg#*=}"
;;
*)
echo "Unknown option: $arg"
exit 1
;;
esac
shift
done
echo "end_date: $end_date"
echo "offset: $offset"
# Check if required arguments are set
if [ -z "$end_date" ] || [ -z "$project_dir" ] || [ -z "$offset" ]; then
echo "Missing arguments. Use: ci_extraction.sh --end_date=2024-01-01 --offset=28 --project_dir=Bagamoyo_trial"
exit 1
fi
echo ci_extraction.R $end_date $offset $project_dir
cd ../r_app
Rscript 20_ci_extraction.R $end_date $offset $project_dir

View file

@ -17,6 +17,6 @@ for arg in "$@"; do
done done
echo "Running interpolate_growth_model.R for $project_dir." echo "Running interpolate_growth_model.R for $project_dir."
cd r_app cd ../r_app
Rscript 03_interpolate_growth_model.R $project_dir Rscript 30_interpolate_growth_model.R $project_dir
cd .. cd ..

43
40_mosaic_creation.sh Normal file
View file

@ -0,0 +1,43 @@
#!/bin/bash
end_date="2024-06-08"
offset=7
data_dir="chemba"
file_name_tif="week_03_2024.tif"
# Parse command line arguments
for arg in "$@"; do
case $arg in
--offset=*)
offset="${arg#*=}"
;;
--end_date=*)
end_date="${arg#*=}"
;;
--data_dir=*)
data_dir="${arg#*=}"
;;
--file_name_tif=*)
file_name_tif="${arg#*=}"
;;
*)
echo "Unknown option: $arg"
exit 1
;;
esac
shift
done
echo "offset: $offset"
echo "end_date: $end_date"
# Check if required arguments are set
if [ -z "$end_date" ] || [ -z "$data_dir" ] || [ -z "$offset" ] || [ -z "$file_name_tif" ]; then
echo "Missing arguments. Use: 40_mosaic_creation.sh --endate=2024-01-01 --offset=7 --data_dir=chemba --file_name_tif=week_03_2024.tif"
exit 1
fi
echo 40_mosaic_creation.R $end_date $offset $data_dir $file_name_tif
cd ../r_app
Rscript 40_mosaic_creation.R $end_date $offset $data_dir $file_name_tif

View file

@ -7,10 +7,18 @@
# and ensures proper R execution with renv environment and error handling. # and ensures proper R execution with renv environment and error handling.
# Script configuration # Script configuration
SCRIPT_NAME="09_run_calculate_kpis.sh" SCRIPT_NAME="80_calculate_kpis.sh"
R_SCRIPT_NAME="09_calculate_kpis.R" R_SCRIPT_NAME="80_calculate_kpis.R"
LOG_PREFIX="[KPI_CALC]" LOG_PREFIX="[KPI_CALC]"
project_dir="tz11_mbigiri_john_trial"
offset=7
end_date=$(date +"%Y-%m-%d")
# Function to log messages with timestamp # Function to log messages with timestamp
log_message() { log_message() {
echo "$(date '+%Y-%m-%d %H:%M:%S') $LOG_PREFIX $1" echo "$(date '+%Y-%m-%d %H:%M:%S') $LOG_PREFIX $1"
@ -40,76 +48,60 @@ check_directory() {
# Main execution function # Main execution function
main() { main() {
R_CMD ="Rscript"
log_message "Starting KPI calculation pipeline step" log_message "Starting KPI calculation pipeline step"
# Check if we're in the correct directory # Check if we're in the correct directory
if [ ! -f "r_app/$R_SCRIPT_NAME" ]; then if [ ! -f "../r_app/$R_SCRIPT_NAME" ]; then
handle_error "Must be run from smartcane root directory (where r_app/ folder exists)" handle_error "Must be run from lavevel_app directory (where ../r_app/ folder exists)"
fi
# Check for R installation
if ! command -v R &> /dev/null; then
# Try Windows R installation path
R_CMD="C:/Program Files/R/R-4.4.3/bin/x64/R.exe"
if [ ! -f "$R_CMD" ]; then
handle_error "R not found in PATH or at expected Windows location"
fi
else
R_CMD="R"
fi fi
log_message "Using R at: $R_CMD" log_message "Using R at: $R_CMD"
# Set default project directory if not provided log_message "Using project directory: $project_dir"
if [ -z "$1" ]; then
PROJECT_DIR="esa"
log_message "No project directory specified, using default: $PROJECT_DIR"
else
PROJECT_DIR="$1"
log_message "Using project directory: $PROJECT_DIR"
fi
# Check if project directory exists # Check if project directory exists
PROJECT_PATH="laravel_app/storage/app/$PROJECT_DIR" project_path="../laravel_app/storage/app/$project_dir"
check_directory "$PROJECT_PATH" || handle_error "Project directory not found: $PROJECT_PATH" check_directory "$project_path" || handle_error "Project directory not found: $project_path"
# Check for required data files # Check for required data files
check_file "$PROJECT_PATH/Data/pivot.geojson" check_file "$project_path/Data/pivot.geojson"
# Check for weekly mosaic directory # Check for weekly mosaic directory
MOSAIC_DIR="$PROJECT_PATH/weekly_mosaic" mosaic_dir="$project_path/weekly_mosaic"
check_directory "$MOSAIC_DIR" || handle_error "Weekly mosaic directory not found: $MOSAIC_DIR" check_directory "$mosaic_dir" || handle_error "Weekly mosaic directory not found: $mosaic_dir"
# Count available mosaics # Count available mosaics
MOSAIC_COUNT=$(find "$MOSAIC_DIR" -name "week_*.tif" 2>/dev/null | wc -l) mosaic_count=$(find "$mosaic_dir" -name "week_*.tif" 2>/dev/null | wc -l)
if [ "$MOSAIC_COUNT" -lt 1 ]; then if [ "$mosaic_count" -lt 1 ]; then
handle_error "No weekly mosaics found in $MOSAIC_DIR" handle_error "No weekly mosaics found in $mosaic_dir"
fi fi
log_message "Found $MOSAIC_COUNT weekly mosaics in $MOSAIC_DIR" log_message "Found $mosaic_count weekly mosaics in $mosaic_dir"
# Create temporary R script with project configuration # Create temporary R script with project configuration
TEMP_R_SCRIPT="temp_kpi_calc_$$.R" temp_r_script="temp_kpi_calc_$$.R"
cat > "r_app/$TEMP_R_SCRIPT" << EOF cat > "../r_app/$temp_r_script" << EOF
# Temporary KPI calculation script # Temporary KPI calculation script
# Generated by $SCRIPT_NAME on $(date) # Generated by $SCRIPT_NAME on $(date)
# Set project directory # Set project directory
project_dir <- "$PROJECT_DIR" # project_dir <- "$PROJECT_DIR"
# Set working directory to r_app # Set working directory to r_app
setwd("r_app") #setwd("r_app")
# Source the main KPI calculation script # Source the main KPI calculation script
tryCatch({ tryCatch({
source("$R_SCRIPT_NAME") source("$R_SCRIPT_NAME")
cat("✓ KPI calculation completed successfully\\n") cat("✓ KPI calculation completed successfully!!n")
}, error = function(e) { }, error = function(e) {
cat("✗ Error in KPI calculation:", e\$message, "\\n") cat("✗ Error in KPI calculation:", e\$message, "\\n")
quit(status = 1) quit(status = 1)
}) })
EOF EOF
log_message "Created temporary R script: r_app/$TEMP_R_SCRIPT" log_message "Created temporary R script: r_app/$temp_r_script"
# Execute R script # Execute R script
log_message "Starting R execution..." log_message "Starting R execution..."
@ -124,12 +116,17 @@ EOF
R_EXIT_CODE=$? R_EXIT_CODE=$?
else else
# Unix/Linux execution # Unix/Linux execution
"$R_CMD" --vanilla < "r_app/$TEMP_R_SCRIPT" cd r_app
log_message "calling $R_CMD $temp_r_script "
Rscript "$temp_r_script" "$end_date" "$offset" "$project_dir"
R_EXIT_CODE=$? R_EXIT_CODE=$?
fi fi
# Clean up temporary script # Clean up temporary script
rm -f "r_app/$TEMP_R_SCRIPT" rm -f "../r_app/$temp_r_script"
log_message "Cleaned up temporary R script" log_message "Cleaned up temporary R script"
# Check R execution result # Check R execution result
@ -137,7 +134,7 @@ EOF
log_message "✓ KPI calculation completed successfully" log_message "✓ KPI calculation completed successfully"
# Check if output files were created # Check if output files were created
REPORTS_DIR="laravel_app/storage/app/$PROJECT_DIR/reports" REPORTS_DIR="../laravel_app/storage/app/$project_dir/reports"
if check_directory "$REPORTS_DIR/kpis"; then if check_directory "$REPORTS_DIR/kpis"; then
KPI_FILES=$(find "$REPORTS_DIR/kpis" -name "*$(date '+%Y%m%d')*" 2>/dev/null | wc -l) KPI_FILES=$(find "$REPORTS_DIR/kpis" -name "*$(date '+%Y%m%d')*" 2>/dev/null | wc -l)
if [ "$KPI_FILES" -gt 0 ]; then if [ "$KPI_FILES" -gt 0 ]; then
@ -156,18 +153,21 @@ EOF
# Script usage information # Script usage information
usage() { usage() {
echo "Usage: $0 [PROJECT_DIR]" echo "Usage: $0 --project_dir=[PROJECT_DIR] --offset=[number] --end-date=[date]"
echo "" echo ""
echo "Calculate KPI metrics for SmartCane monitoring system" echo "Calculate KPI metrics for SmartCane monitoring system"
echo "" echo ""
echo "Parameters:" echo "Parameters:"
echo " PROJECT_DIR Project directory name (default: esa)" echo " --project_dir Project directory name (default: esa)"
echo " Must exist in laravel_app/storage/app/" echo " Must exist in laravel_app/storage/app/"
echo ""
echo " --offset (default: 7)"
echo ""
echo " --end-date (default: $(date +%Y-%m-%d))"
echo "" echo ""
echo "Examples:" echo "Examples:"
echo " $0 # Use default 'esa' project" echo " $0 # Use default 'esa' project"
echo " $0 aura # Use 'aura' project" echo " $0 --project_dir=aura --offset=7 # Use 'aura' project with offset 7"
echo " $0 chemba # Use 'chemba' project"
echo "" echo ""
echo "Requirements:" echo "Requirements:"
echo " - R installation (4.4.3 or compatible)" echo " - R installation (4.4.3 or compatible)"
@ -176,13 +176,41 @@ usage() {
echo " - Field boundaries in PROJECT_DIR/Data/pivot.geojson" echo " - Field boundaries in PROJECT_DIR/Data/pivot.geojson"
} }
# Handle command line arguments ## Parse command line arguments
case "${1:-}" in for arg in "$@"; do
-h|--help) case $arg in
usage -h|--help)
exit 0 usage
;; exit 0
*) ;;
main "$@" --offset=*)
;; offset="${arg#*=}"
esac ;;
--end_date=*)
end_date="${arg#*=}"
;;
--project_dir=*)
project_dir="${arg#*=}"
;;
*)
echo "Unknown option: $arg"
exit 1
;;
esac
shift
done
# ----------------------------------------------------------------
# Validate required arguments bit dumb because all have defaults;
# ----------------------------------------------------------------
if [[ -z "$project_dir" || -z "$offset" || -z "$end_date" ]]; then
echo "❌ Missing required arguments." >&2
usage
exit 1
fi
# -------------------------------------
# Run main
# -------------------------------------
main

View file

@ -51,6 +51,6 @@ done
echo "Running CI report with KPIs for $data_dir, report date $report_date, mail day $mail_day." echo "Running CI report with KPIs for $data_dir, report date $report_date, mail day $mail_day."
echo "Parameters: borders=$borders, ci_plot_type=$ci_plot_type, colorblind=$colorblind_friendly, facet_by_season=$facet_by_season, x_axis_unit=$x_axis_unit" echo "Parameters: borders=$borders, ci_plot_type=$ci_plot_type, colorblind=$colorblind_friendly, facet_by_season=$facet_by_season, x_axis_unit=$x_axis_unit"
cd r_app cd ../r_app
Rscript -e "rmarkdown::render('10_CI_report_with_kpis_simple.Rmd', output_file='$filename', params=list(report_date='$report_date', mail_day='$mail_day', data_dir='$data_dir', borders='$borders', ci_plot_type='$ci_plot_type', colorblind_friendly='$colorblind_friendly', facet_by_season='$facet_by_season', x_axis_unit='$x_axis_unit'))" Rscript -e "rmarkdown::render('90_CI_report_with_kpis_simple.Rmd', output_file='$filename', params=list(report_date='$report_date', mail_day='$mail_day', data_dir='$data_dir', borders='$borders', ci_plot_type='$ci_plot_type', colorblind_friendly='$colorblind_friendly', facet_by_season='$facet_by_season', x_axis_unit='$x_axis_unit'))"
cd .. cd ..

View file

@ -1,180 +0,0 @@
# Analyze timing between CI threshold crossings and actual harvest dates
# Goal: Determine how soon after CI drops below threshold the harvest actually occurs
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(here)
library(ggplot2)
})
# Set project directory
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
source(here("r_app", "parameters_project.R"))
# Read daily CI data
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
time_series_daily <- ci_data_raw %>%
mutate(date = as.Date(Date)) %>%
select(field_id = field, date, ci = FitData) %>%
arrange(field_id, date)
# Read actual harvest data
harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(!is.na(season_end))
cat("=== ANALYZING CI THRESHOLD CROSSING TIMING ===\n\n")
# For each actual harvest, find when CI first dropped below various thresholds
thresholds <- c(3.0, 2.5, 2.0, 1.8)
results <- list()
for (i in 1:nrow(harvest_actual)) {
harvest <- harvest_actual[i, ]
field <- harvest$field
harvest_date <- harvest$season_end
# Get CI data for this field in the year before harvest
field_data <- time_series_daily %>%
filter(field_id == field,
date >= (harvest_date - 365),
date <= harvest_date) %>%
arrange(date)
if (nrow(field_data) == 0) next
# For each threshold, find LAST crossing date (working backward from harvest)
# This finds the mature→harvest transition, not the previous cycle's harvest
threshold_crossings <- sapply(thresholds, function(threshold) {
# Find LAST period where CI was high (>3.5), then dropped below threshold
# Work backward from harvest date
last_mature_idx <- NA
for (j in nrow(field_data):1) {
if (!is.na(field_data$ci[j]) && field_data$ci[j] > 3.5) {
last_mature_idx <- j
break
}
}
# If no mature period found, skip
if (is.na(last_mature_idx)) return(NA)
# Now find first crossing below threshold AFTER the mature period
for (j in last_mature_idx:(nrow(field_data) - 2)) {
if (!is.na(field_data$ci[j]) && !is.na(field_data$ci[j+1]) && !is.na(field_data$ci[j+2]) &&
field_data$ci[j] < threshold &&
field_data$ci[j+1] < threshold &&
field_data$ci[j+2] < threshold) {
return(as.character(field_data$date[j]))
}
}
return(NA)
})
result_row <- data.frame(
field = field,
harvest_date = harvest_date,
ci_at_harvest = field_data$ci[nrow(field_data)]
)
for (k in 1:length(thresholds)) {
threshold <- thresholds[k]
crossing_date <- as.Date(threshold_crossings[k])
if (!is.na(crossing_date)) {
days_before_harvest <- as.numeric(harvest_date - crossing_date)
result_row[[paste0("first_below_", threshold)]] <- as.character(crossing_date)
result_row[[paste0("days_before_", threshold)]] <- days_before_harvest
} else {
result_row[[paste0("first_below_", threshold)]] <- NA
result_row[[paste0("days_before_", threshold)]] <- NA
}
}
results[[i]] <- result_row
}
timing_analysis <- bind_rows(results)
# Print summary statistics
cat("\n=== TIMING STATISTICS: Days from threshold crossing to actual harvest ===\n\n")
for (threshold in thresholds) {
days_col <- paste0("days_before_", threshold)
days_before <- timing_analysis[[days_col]]
days_before <- days_before[!is.na(days_before)]
if (length(days_before) > 0) {
cat(sprintf("CI < %.1f threshold:\n", threshold))
cat(sprintf(" Valid cases: %d/%d (%.1f%%)\n",
length(days_before), nrow(timing_analysis),
100 * length(days_before) / nrow(timing_analysis)))
cat(sprintf(" Mean: %.1f days before harvest\n", mean(days_before)))
cat(sprintf(" Median: %.1f days before harvest\n", median(days_before)))
cat(sprintf(" Range: %.1f to %.1f days\n", min(days_before), max(days_before)))
cat(sprintf(" Q1-Q3: %.1f to %.1f days\n", quantile(days_before, 0.25), quantile(days_before, 0.75)))
# Count how many harvests occur within specific time windows after crossing
within_7d <- sum(days_before >= 0 & days_before <= 7)
within_14d <- sum(days_before >= 0 & days_before <= 14)
within_21d <- sum(days_before >= 0 & days_before <= 21)
within_30d <- sum(days_before >= 0 & days_before <= 30)
cat(sprintf(" Harvest timing after crossing:\n"))
cat(sprintf(" 0-7 days: %d (%.1f%%)\n", within_7d, 100*within_7d/length(days_before)))
cat(sprintf(" 0-14 days: %d (%.1f%%)\n", within_14d, 100*within_14d/length(days_before)))
cat(sprintf(" 0-21 days: %d (%.1f%%)\n", within_21d, 100*within_21d/length(days_before)))
cat(sprintf(" 0-30 days: %d (%.1f%%)\n", within_30d, 100*within_30d/length(days_before)))
cat("\n")
} else {
cat(sprintf("CI < %.1f threshold: No valid crossings found\n\n", threshold))
}
}
# Show detailed table for fields with mismatches
cat("\n=== DETAILED TIMING BY FIELD ===\n")
# Get column names dynamically
days_cols <- grep("days_before_", names(timing_analysis), value = TRUE)
select_cols <- c("field", "harvest_date", "ci_at_harvest", days_cols[1:min(2, length(days_cols))])
print(timing_analysis %>%
select(all_of(select_cols)) %>%
arrange(field, harvest_date), n = 100)
# Create visualization
cat("\n=== Creating timing distribution plot ===\n")
timing_long <- timing_analysis %>%
select(field, harvest_date, starts_with("days_before_")) %>%
pivot_longer(cols = starts_with("days_before_"),
names_to = "threshold",
values_to = "days_before") %>%
filter(!is.na(days_before)) %>%
mutate(threshold = gsub("days_before_", "CI < ", threshold))
png("timing_threshold_to_harvest.png", width = 1200, height = 800, res = 120)
ggplot(timing_long, aes(x = days_before, fill = threshold)) +
geom_histogram(binwidth = 7, alpha = 0.7, position = "identity") +
facet_wrap(~threshold, ncol = 1) +
geom_vline(xintercept = c(7, 14, 21), linetype = "dashed", color = "red", alpha = 0.5) +
labs(
title = "Time from CI Threshold Crossing to Actual Harvest",
subtitle = "How many days AFTER CI drops below threshold does harvest actually occur?",
x = "Days from threshold crossing to harvest",
y = "Count of harvest events",
caption = "Dashed lines at 7, 14, 21 days"
) +
theme_minimal() +
theme(legend.position = "none")
dev.off()
cat("\nPlot saved to: timing_threshold_to_harvest.png\n")

View file

@ -1,197 +0,0 @@
# Analyze CI drop patterns to distinguish harvest from anomalies
# Goal: Identify characteristics of true harvest drops vs single-day noise
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(here)
library(ggplot2)
})
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
source(here("r_app", "parameters_project.R"))
# Read daily CI data
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
time_series_daily <- ci_data_raw %>%
mutate(date = as.Date(Date)) %>%
select(field_id = field, date, ci = FitData) %>%
arrange(field_id, date) %>%
group_by(field_id) %>%
mutate(
# Calculate changes
ci_lag1 = lag(ci, 1),
ci_lag2 = lag(ci, 2),
ci_lead1 = lead(ci, 1),
ci_lead2 = lead(ci, 2),
ci_lead3 = lead(ci, 3),
# Drop magnitude
drop_1day = ci_lag1 - ci,
drop_2day = ci_lag2 - ci,
# Recovery after drop
recovery_1day = ci_lead1 - ci,
recovery_2day = ci_lead2 - ci,
recovery_3day = ci_lead3 - ci,
# Is this a single-day anomaly?
is_spike_drop = (ci < 2.0 & ci_lag1 > 3.0 & ci_lead1 > 3.0)
) %>%
ungroup()
# Read actual harvest data
harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(!is.na(season_end))
cat("=== ANALYZING CI DROP PATTERNS ===\n\n")
# Find all instances where CI drops below 2.0
all_drops <- time_series_daily %>%
filter(ci < 2.0, ci_lag1 > 2.0) %>% # First day below 2.0
select(field_id, date, ci, ci_lag1, drop_1day,
ci_lead1, ci_lead2, ci_lead3,
recovery_1day, recovery_2day, recovery_3day)
# Classify drops based on what happens next
drops_classified <- all_drops %>%
mutate(
drop_type = case_when(
# Spike: drops but recovers to >3.0 within 3 days
!is.na(ci_lead1) & ci_lead1 > 3.0 ~ "SPIKE (1-day anomaly)",
!is.na(ci_lead2) & ci_lead2 > 3.0 ~ "SPIKE (2-day anomaly)",
!is.na(ci_lead3) & ci_lead3 > 3.0 ~ "SPIKE (3-day anomaly)",
# Sustained: stays below 2.5 for at least 3 days
!is.na(ci_lead1) & !is.na(ci_lead2) & !is.na(ci_lead3) &
ci_lead1 < 2.5 & ci_lead2 < 2.5 & ci_lead3 < 2.5 ~ "SUSTAINED (likely harvest)",
TRUE ~ "UNCLEAR (insufficient data)"
),
sharp_drop = drop_1day > 1.0 # Drop >1 CI point
)
cat("=== DROP TYPE DISTRIBUTION ===\n")
drop_summary <- drops_classified %>%
count(drop_type) %>%
mutate(percent = 100 * n / sum(n)) %>%
arrange(desc(n))
print(drop_summary)
cat("\n=== SHARP DROPS (>1.0 CI point) ===\n")
sharp_summary <- drops_classified %>%
filter(sharp_drop) %>%
count(drop_type) %>%
mutate(percent = 100 * n / sum(n))
print(sharp_summary)
# Match drops to actual harvests
cat("\n=== MATCHING DROPS TO ACTUAL HARVESTS ===\n")
drops_with_harvest <- drops_classified %>%
left_join(
harvest_actual %>%
select(field, actual_harvest_date = season_end),
by = c("field_id" = "field")
) %>%
filter(!is.na(actual_harvest_date)) %>%
mutate(
days_from_harvest = as.numeric(date - actual_harvest_date),
near_harvest = abs(days_from_harvest) <= 14,
timing_category = case_when(
days_from_harvest >= -7 & days_from_harvest <= 7 ~ "Within 1 week of harvest",
days_from_harvest >= -14 & days_from_harvest <= 14 ~ "Within 2 weeks of harvest",
days_from_harvest >= -21 & days_from_harvest <= 21 ~ "Within 3 weeks of harvest",
TRUE ~ "Far from harvest (>3 weeks)"
)
)
cat("\n=== DROP TYPES BY PROXIMITY TO ACTUAL HARVEST ===\n")
harvest_proximity_summary <- drops_with_harvest %>%
count(drop_type, timing_category) %>%
pivot_wider(names_from = timing_category, values_from = n, values_fill = 0)
print(harvest_proximity_summary)
# Key insight: What % of SUSTAINED drops are near harvest vs SPIKE drops?
cat("\n=== KEY INSIGHT: Are sustained drops near harvest? ===\n")
sustained_near_harvest <- drops_with_harvest %>%
filter(grepl("SUSTAINED", drop_type)) %>%
summarise(
total = n(),
near_harvest = sum(near_harvest),
percent_near = 100 * near_harvest / total
)
spike_near_harvest <- drops_with_harvest %>%
filter(grepl("SPIKE", drop_type)) %>%
summarise(
total = n(),
near_harvest = sum(near_harvest),
percent_near = 100 * near_harvest / total
)
cat("\nSUSTAINED drops (CI stays low):\n")
cat(sprintf(" Total: %d\n", sustained_near_harvest$total))
cat(sprintf(" Near harvest (±14d): %d (%.1f%%)\n",
sustained_near_harvest$near_harvest,
sustained_near_harvest$percent_near))
cat("\nSPIKE drops (CI recovers quickly):\n")
cat(sprintf(" Total: %d\n", spike_near_harvest$total))
cat(sprintf(" Near harvest (±14d): %d (%.1f%%)\n",
spike_near_harvest$near_harvest,
spike_near_harvest$percent_near))
# Analyze recovery patterns
cat("\n=== RECOVERY PATTERNS (how fast does CI bounce back?) ===\n")
recovery_stats <- drops_classified %>%
filter(!is.na(recovery_3day)) %>%
group_by(drop_type) %>%
summarise(
count = n(),
mean_recovery_1d = mean(recovery_1day, na.rm = TRUE),
mean_recovery_2d = mean(recovery_2day, na.rm = TRUE),
mean_recovery_3d = mean(recovery_3day, na.rm = TRUE),
median_recovery_1d = median(recovery_1day, na.rm = TRUE),
median_recovery_2d = median(recovery_2day, na.rm = TRUE),
median_recovery_3d = median(recovery_3day, na.rm = TRUE)
)
print(recovery_stats)
# Show examples of each type
cat("\n=== EXAMPLES: SPIKE (false alarm) ===\n")
print(drops_classified %>%
filter(drop_type == "SPIKE (1-day anomaly)") %>%
select(field_id, date, ci_lag1, ci, ci_lead1, drop_1day, recovery_1day) %>%
head(10), n = 10)
cat("\n=== EXAMPLES: SUSTAINED (likely harvest) ===\n")
print(drops_classified %>%
filter(drop_type == "SUSTAINED (likely harvest)") %>%
select(field_id, date, ci_lag1, ci, ci_lead1, ci_lead2, ci_lead3, drop_1day) %>%
head(10), n = 10)
# Recommendation
cat("\n=== RECOMMENDATION ===\n")
cat("To avoid false alarms from single-day spikes:\n")
cat("1. Require CI to stay below 2.0 for at least 3 consecutive days\n")
cat("2. Check that CI doesn't recover above 3.0 within next 3 days\n")
cat("3. Sharp drops (>1.0 CI) that sustain are strong harvest signals\n")
cat("4. Trade-off: Waiting 3 days for confirmation delays alert by 3 days\n")
cat(" - But eliminates false positives from cloud noise\n")
cat(" - Harvest still detected 4-11 days before actual event (median 7d)\n")

View file

@ -1,136 +0,0 @@
# R script to analyze image dates and missing weeks
library(dplyr)
library(lubridate)
library(ggplot2)
# Set folder path
folder <- "laravel_app/storage/app/esa/merged_final_tif"
files <- list.files(folder, pattern = "\\.tif$", full.names = FALSE)
df <- data.frame(date = dates)
# Extract dates and file sizes
dates <- as.Date(sub(".tif$", "", files))
sizes_kb <- file.info(file.path(folder, files))$size / 1024
df <- data.frame(date = dates, size_kb = sizes_kb, file = files) %>%
mutate(year = year(date),
week = isoweek(date),
completeness = ifelse(size_kb >= 9000, "Complete", "Incomplete"))
# Get all years in data
years <- sort(unique(df$year))
# Prepare output table
output <- data.frame(
year = integer(),
n_images = integer(),
n_weeks_missing = integer(),
max_consec_weeks_missing = integer(),
avg_images_per_week = numeric(),
stringsAsFactors = FALSE
)
missing_weeks_list <- list()
current_year <- as.integer(format(Sys.Date(), "%Y"))
# For plotting: build a data frame with all year/week combinations and count images per week
# For plotting: count complete/incomplete images per week/year
plot_weeks <- expand.grid(year = years, week = 1:52, completeness = c("Complete", "Incomplete"))
plot_weeks$n_images <- 0
for (i in seq_len(nrow(plot_weeks))) {
y <- plot_weeks$year[i]
w <- plot_weeks$week[i]
ctype <- plot_weeks$completeness[i]
plot_weeks$n_images[i] <- sum(df$year == y & df$week == w & df$completeness == ctype)
}
# Plot: X = week, Y = number of images, fill = completeness, color = year (stacked bar chart)
gg <- ggplot(plot_weeks, aes(x = week, y = n_images, fill = completeness)) +
geom_col(position = "stack") +
facet_wrap(~ year, ncol = 1) +
scale_x_continuous(breaks = 1:52) +
scale_y_continuous(breaks = 0:max(plot_weeks$n_images)) +
labs(x = "Week number", y = "Number of images", fill = "Completeness",
title = "Complete vs Incomplete Images per Week (by Year)") +
theme_minimal()
ggsave("images_per_week_by_year_stacked.png", gg, width = 12, height = 10)
cat("Plot saved as images_per_week_by_year_stacked.png\n")
current_week <- isoweek(Sys.Date())
for (y in years) {
# For current year, only consider weeks up to today; for past years, all 1:52
if (y == current_year) {
all_weeks <- 1:current_week
} else {
all_weeks <- 1:52
}
weeks_with_images <- unique(df$week[df$year == y])
weeks_missing <- setdiff(all_weeks, weeks_with_images)
n_weeks_missing <- length(weeks_missing)
n_images <- sum(df$year == y)
if ((y == current_year) && (current_week - n_weeks_missing > 0)) {
avg_images_per_week <- n_images / (current_week - n_weeks_missing)
} else if (y != current_year && (52 - n_weeks_missing > 0)) {
avg_images_per_week <- n_images / (52 - n_weeks_missing)
} else {
avg_images_per_week <- NA
}
# Find longest run of consecutive missing weeks
if (n_weeks_missing == 0) {
max_consec <- 0
} else {
w <- sort(weeks_missing)
runs <- rle(c(1, diff(w)) == 1)
max_consec <- max(runs$lengths[runs$values], na.rm = TRUE)
}
output <- rbind(output, data.frame(
year = y,
n_images = n_images,
n_weeks_missing = n_weeks_missing,
max_consec_weeks_missing = max_consec,
avg_images_per_week = round(avg_images_per_week, 2)
))
if (n_weeks_missing > 0) {
missing_weeks_list[[as.character(y)]] <- weeks_missing
}
}
# Write to CSV
print(output)
write.csv(output, file = "image_availability_by_year.csv", row.names = FALSE)
# Print missing weeks for years with missing data
for (y in names(missing_weeks_list)) {
cat(sprintf("Year %s missing weeks: %s\n", y, paste(missing_weeks_list[[y]], collapse=", ")))
}
# Calculate and print max consecutive weeks with only incomplete data per year
cat("\nMax consecutive weeks with only incomplete images per year:\n")
for (y in years) {
if (y == current_year) {
all_weeks <- 1:current_week
} else {
all_weeks <- 1:52
}
# Weeks where all images are incomplete (no complete images)
weeks_incomplete <- plot_weeks$week[plot_weeks$year == y & plot_weeks$completeness == "Complete" & plot_weeks$n_images == 0]
# Only keep weeks that actually have at least one image (i.e., not missing entirely)
weeks_with_any_image <- unique(df$week[df$year == y])
weeks_incomplete <- intersect(weeks_incomplete, weeks_with_any_image)
if (length(weeks_incomplete) == 0) {
max_consec_incomplete <- 0
} else {
w <- sort(weeks_incomplete)
runs <- rle(c(1, diff(w)) == 1)
max_consec_incomplete <- max(runs$lengths[runs$values], na.rm = TRUE)
}
cat(sprintf("Year %d: %d\n", y, max_consec_incomplete))
}

View file

@ -1,82 +0,0 @@
import torch
import torch.nn as nn
import time
print("=" * 80)
print("PYTORCH GPU vs CPU BENCHMARK TEST")
print("=" * 80)
# Model definition
class SimpleModel(nn.Module):
def __init__(self):
super(SimpleModel, self).__init__()
self.fc1 = nn.Linear(784, 1000)
self.fc2 = nn.Linear(1000, 1000)
self.fc3 = nn.Linear(1000, 10)
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
# Dummy data - larger dataset
x = torch.randn(100000, 784)
y = torch.randint(0, 10, (100000,))
# Loss function
criterion = nn.CrossEntropyLoss()
print("\n1. GPU TRAINING")
print("-" * 80)
model_gpu = SimpleModel().cuda() # Move to GPU
optimizer_gpu = torch.optim.Adam(model_gpu.parameters())
x_gpu = x.cuda()
y_gpu = y.cuda()
print(f"Device: {next(model_gpu.parameters()).device}")
print(f"GPU Memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
start_time = time.time()
for epoch in range(20):
optimizer_gpu.zero_grad()
outputs = model_gpu(x_gpu)
loss = criterion(outputs, y_gpu)
loss.backward()
optimizer_gpu.step()
if (epoch + 1) % 5 == 0:
print(f" Epoch {epoch+1}/20 - Loss: {loss.item():.4f}")
gpu_time = time.time() - start_time
print(f"\nGPU training time: {gpu_time:.2f} seconds")
print("\n2. CPU TRAINING")
print("-" * 80)
model_cpu = SimpleModel().cpu() # Stay on CPU
optimizer_cpu = torch.optim.Adam(model_cpu.parameters())
x_cpu = x.cpu()
y_cpu = y.cpu()
print(f"Device: {next(model_cpu.parameters()).device}")
start_time = time.time()
for epoch in range(20):
optimizer_cpu.zero_grad()
outputs = model_cpu(x_cpu)
loss = criterion(outputs, y_cpu)
loss.backward()
optimizer_cpu.step()
if (epoch + 1) % 5 == 0:
print(f" Epoch {epoch+1}/20 - Loss: {loss.item():.4f}")
cpu_time = time.time() - start_time
print(f"\nCPU training time: {cpu_time:.2f} seconds")
print("\n" + "=" * 80)
print("RESULTS")
print("=" * 80)
print(f"GPU time: {gpu_time:.2f} seconds")
print(f"CPU time: {cpu_time:.2f} seconds")
print(f"Speedup: {cpu_time / gpu_time:.1f}x faster on GPU")
print("=" * 80)

View file

@ -1,207 +0,0 @@
# SmartCane Repository Cleanup Script
# This script will delete unnecessary files and move experimental scripts
# Review this script before running: .\cleanup_repo.ps1
Write-Host "🧹 SmartCane Repository Cleanup" -ForegroundColor Cyan
Write-Host "================================" -ForegroundColor Cyan
Write-Host ""
$deletedCount = 0
$movedCount = 0
$errors = @()
# ============================================================================
# PART 1: DELETE FILES
# ============================================================================
Write-Host "📁 PART 1: Deleting files..." -ForegroundColor Yellow
Write-Host ""
# A) Test & Debug Scripts
$testFiles = @(
"r_app/test_benchmarks.R",
"r_app/test_harvest.R",
"r_app/test_kpis_esa.R",
"r_app/debug_kpis.R",
"r_app/quick_layout_test.R",
"r_app/run_minimal_test.R"
)
Write-Host "Deleting test and debug scripts..." -ForegroundColor Gray
foreach ($file in $testFiles) {
if (Test-Path $file) {
Remove-Item $file -Force
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
$deletedCount++
} else {
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
}
}
# B) Output Files (.Rout)
$routFiles = @(
"r_app/02_ci_extraction.Rout",
"r_app/03_interpolate_growth_model.Rout",
"r_app/04_mosaic_creation.Rout"
)
Write-Host "`nDeleting .Rout files..." -ForegroundColor Gray
foreach ($file in $routFiles) {
if (Test-Path $file) {
Remove-Item $file -Force
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
$deletedCount++
} else {
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
}
}
# C) Temporary PDF Files
$pdfFiles = @(
"Rplots.pdf",
"r_app/Rplots.pdf"
)
Write-Host "`nDeleting temporary PDF files..." -ForegroundColor Gray
foreach ($file in $pdfFiles) {
if (Test-Path $file) {
Remove-Item $file -Force
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
$deletedCount++
} else {
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
}
}
# D) Old/Deprecated Scripts
$oldScripts = @(
"r_app/ci_extraction.R",
"r_app/interpolate_growth_model.R",
"r_app/mosaic_creation.R",
"r_app/installPackages.R",
"r_app/packages.R",
"generated_package_config.R"
)
Write-Host "`nDeleting old/deprecated scripts..." -ForegroundColor Gray
foreach ($file in $oldScripts) {
if (Test-Path $file) {
Remove-Item $file -Force
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
$deletedCount++
} else {
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
}
}
# E) Generated Word Documents
$wordDocs = @(
"r_app/CI_report.docx",
"r_app/CI_report2.docx",
"r_app/CI_report_age_filtered.docx",
"r_app/CI_report_last_week.docx",
"r_app/CI_report_week38_corrected.docx",
"r_app/CI_report_with_kpis_aura.docx",
"r_app/CI_report_with_kpis_esa.docx",
"r_app/05_CI_report_dashboard_planet.docx",
"r_app/10_CI_report_with_kpis_simple.docx",
"r_app/script5_test.docx",
"r_app/test_kpi_grid.docx",
"r_app/output/aura/crop_analysis_AURA_w36vs35_20250916_1631.docx",
"r_app/output/reports/CI_report_with_kpis_simple_test.docx",
"r_app/output/CI_report_2x3_layout.docx",
"r_app/output/CI_report_consolidated.docx",
"r_app/output/CI_report_layout_test.docx",
"r_app/output/test_clean.docx",
"r_app/output/test_grid.docx",
"r_app/output/test_kables.docx",
"r_app/output/test_merged.docx"
)
Write-Host "`nDeleting generated Word documents (keeping word-styles-reference-var1.docx)..." -ForegroundColor Gray
foreach ($file in $wordDocs) {
if (Test-Path $file) {
Remove-Item $file -Force
Write-Host " ✓ Deleted: $file" -ForegroundColor Green
$deletedCount++
} else {
Write-Host " ⚠ Not found: $file" -ForegroundColor DarkGray
}
}
# ============================================================================
# PART 2: MOVE FILES TO EXPERIMENTS
# ============================================================================
Write-Host "`n`n📁 PART 2: Moving files to experiments..." -ForegroundColor Yellow
Write-Host ""
# Create destination directories
$destDirs = @(
"r_app/experiments/reports",
"r_app/experiments/legacy_package_management"
)
foreach ($dir in $destDirs) {
if (!(Test-Path $dir)) {
New-Item -ItemType Directory -Path $dir -Force | Out-Null
Write-Host " Created directory: $dir" -ForegroundColor Cyan
}
}
# Move experimental Rmd files
$rmdFiles = @(
@{Source="r_app/CI_report_dashboard_planet.Rmd"; Dest="r_app/experiments/reports/"},
@{Source="r_app/CI_report_dashboard_planet_enhanced.Rmd"; Dest="r_app/experiments/reports/"},
@{Source="r_app/CI_report_executive_summary.Rmd"; Dest="r_app/experiments/reports/"},
@{Source="r_app/simple_kpi_report.Rmd"; Dest="r_app/experiments/reports/"},
@{Source="r_app/test_kpi_grid.Rmd"; Dest="r_app/experiments/reports/"},
@{Source="r_app/test_minimal.Rmd"; Dest="r_app/experiments/reports/"}
)
Write-Host "Moving experimental Rmd files..." -ForegroundColor Gray
foreach ($file in $rmdFiles) {
if (Test-Path $file.Source) {
Move-Item $file.Source $file.Dest -Force
Write-Host " ✓ Moved: $($file.Source)$($file.Dest)" -ForegroundColor Green
$movedCount++
} else {
Write-Host " ⚠ Not found: $($file.Source)" -ForegroundColor DarkGray
}
}
# Move legacy package management scripts
$legacyFiles = @(
@{Source="r_app/extract_current_versions.R"; Dest="r_app/experiments/legacy_package_management/"},
@{Source="r_app/package_manager.R"; Dest="r_app/experiments/legacy_package_management/"}
)
Write-Host "`nMoving legacy package management scripts..." -ForegroundColor Gray
foreach ($file in $legacyFiles) {
if (Test-Path $file.Source) {
Move-Item $file.Source $file.Dest -Force
Write-Host " ✓ Moved: $($file.Source)$($file.Dest)" -ForegroundColor Green
$movedCount++
} else {
Write-Host " ⚠ Not found: $($file.Source)" -ForegroundColor DarkGray
}
}
# ============================================================================
# SUMMARY
# ============================================================================
Write-Host "`n`n📊 CLEANUP SUMMARY" -ForegroundColor Cyan
Write-Host "==================" -ForegroundColor Cyan
Write-Host "Files deleted: $deletedCount" -ForegroundColor Green
Write-Host "Files moved: $movedCount" -ForegroundColor Green
if ($errors.Count -gt 0) {
Write-Host "`n⚠️ Errors encountered: $($errors.Count)" -ForegroundColor Red
foreach ($err in $errors) {
Write-Host " $err" -ForegroundColor Red
}
}
Write-Host "`n✅ Cleanup completed!" -ForegroundColor Green
Write-Host "`nNext step: Update .gitignore (see instructions)" -ForegroundColor Yellow

View file

@ -1,177 +0,0 @@
#!/usr/bin/env python3
"""
CONVERT_ANGATA_HARVEST.PY
=========================
Converts Angata harvest data from its received format to the standardized SmartCane format.
Input format (as received from Angata):
Contract No | Field No | dop/doh
0001 | 1 | 01/06/2023
Output format (SmartCane standard, matching Aura):
field | sub_field | year | season_start | season_end | age | sub_area | tonnage_ha
The script:
1. Reads Angata harvest.xlsx
2. Extracts field numbers and dates
3. Creates field names from field numbers (e.g., "Field_1", "Field_2", etc.)
4. Extracts year from date
5. Uses dop/doh as season_start (other fields left as NaN for now)
6. Writes output to harvest.xlsx in SmartCane format
Usage:
python convert_angata_harvest.py
"""
import pandas as pd
import os
from datetime import datetime
from pathlib import Path
def convert_angata_harvest():
"""Convert Angata harvest data to SmartCane format."""
# Define paths
angata_dir = Path("laravel_app/storage/app/angata/Data")
input_file = angata_dir / "harvest.xlsx"
output_file = angata_dir / "harvest.xlsx"
# Read all sheets from input file
print(f"Reading Angata harvest data from: {input_file}")
xls = pd.ExcelFile(input_file)
print(f"Sheet names found: {xls.sheet_names}")
# Collect all data from all sheets
all_data = []
for sheet_name in xls.sheet_names:
print(f"\nProcessing sheet: {sheet_name}")
df = pd.read_excel(input_file, sheet_name=sheet_name)
# Remove any completely empty rows
df = df.dropna(how='all')
# Skip if no data
if len(df) == 0:
print(f" Sheet {sheet_name} is empty, skipping")
continue
# Check if this sheet has the required Field No column
if 'Field No' not in df.columns:
print(f" Sheet {sheet_name} does not have 'Field No' column, skipping")
continue
# Check for date column (can be dop/doh or doh/dop)
date_col = None
if 'dop/doh' in df.columns:
date_col = 'dop/doh'
elif 'doh/dop' in df.columns:
date_col = 'doh/dop'
else:
print(f" Sheet {sheet_name} does not have date column (dop/doh or doh/dop), skipping")
continue
# Standardize date column name to 'dop/doh' for consistency
df = df.rename(columns={date_col: 'dop/doh'})
# Clean field numbers that may contain garbage
df = df[pd.notna(df['Field No'])]
print(f" Loaded {len(df)} records from {sheet_name}")
all_data.append(df)
# Combine all sheets
if not all_data:
raise ValueError("No valid data found in any sheet")
print(f"\nCombining data from {len(all_data)} sheets...")
df = pd.concat(all_data, ignore_index=True)
df = df.dropna(how='all') # Remove empty rows after concat
df = df[pd.notna(df['Field No'])] # Ensure no NaN field numbers
print(f"Total records after combining: {len(df)}")
# Validate input columns
required_cols = ['Field No', 'dop/doh']
for col in required_cols:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
# Create conversion dataframe
converted = pd.DataFrame()
# Field name = field number as string (e.g., "1", "2", "10")
converted['field'] = df['Field No'].astype(str)
# Sub-field is same as field
converted['sub_field'] = converted['field']
# Parse dop/doh dates - format is DD/MM/YYYY
print("\nParsing dates...")
dates = []
years = []
for idx, date_str in enumerate(df['dop/doh']):
try:
# Handle NaN/null values
if pd.isna(date_str):
dates.append(pd.NaT)
years.append(None)
else:
# Parse date string in DD/MM/YYYY format
date_obj = pd.to_datetime(date_str, format='%d/%m/%Y')
dates.append(date_obj)
years.append(int(date_obj.year))
except Exception as e:
print(f"Warning: Could not parse date at row {idx}: {date_str} - {e}")
dates.append(pd.NaT)
years.append(None)
# Ensure lists match DataFrame length (handle edge cases)
assert len(dates) == len(df), f"Date list length {len(dates)} != DataFrame length {len(df)}"
assert len(years) == len(df), f"Years list length {len(years)} != DataFrame length {len(df)}"
converted['season_start'] = dates
converted['year'] = years
# Convert year to integer (handle NaN values)
converted['year'] = converted['year'].apply(lambda x: int(x) if pd.notna(x) else None)
# Other fields (not provided in Angata data)
# season_end: empty string (to be filled in by other scripts)
converted['season_end'] = ""
# Replace NaN with None for age, sub_area, tonnage_ha
converted['age'] = None
converted['sub_area'] = None
converted['tonnage_ha'] = None
# Ensure year is integer type in DataFrame
converted['year'] = converted['year'].astype('Int64') # Nullable integer type
# Reorder columns to match Aura format
converted = converted[['field', 'sub_field', 'year', 'season_start', 'season_end', 'age', 'sub_area', 'tonnage_ha']]
# Display summary
print("\nConversion summary:")
print(f" Total records: {len(converted)}")
print(f" Date range: {converted['season_start'].min()} to {converted['season_start'].max()}")
print(f" Years: {sorted(converted['year'].dropna().unique())}")
print(f"\nFirst 10 rows:")
print(converted.head(10))
# Save to Excel
print(f"\nSaving converted data to: {output_file}")
converted.to_excel(output_file, index=False, sheet_name='Harvest')
print("Conversion complete!")
return converted
if __name__ == "__main__":
try:
result = convert_angata_harvest()
print("\nSuccess! Angata harvest data has been converted to SmartCane format.")
except Exception as e:
print(f"\nError during conversion: {e}")
import traceback
traceback.print_exc()

View file

@ -1,212 +0,0 @@
# SmartCane Data Validation Tool
A standalone, client-side data validation tool for validating Excel harvest data and GeoJSON field boundaries before uploading to the SmartCane system.
## Features
### 🚦 Traffic Light System
- **🟢 GREEN**: All checks passed
- **🟡 YELLOW**: Warnings detected (non-critical issues)
- **🔴 RED**: Errors detected (blocking issues)
### ✅ Validation Checks
1. **Excel Column Validation**
- Checks for all 8 required columns: `field`, `sub_field`, `year`, `season_start`, `season_end`, `age`, `sub_area`, `tonnage_ha`
- Identifies extra columns that will be ignored
- Shows missing columns that must be added
2. **GeoJSON Properties Validation**
- Checks all features have required properties: `field`, `sub_field`
- Identifies redundant properties that will be ignored
3. **Coordinate Reference System (CRS)**
- Validates correct CRS: **EPSG:32736 (UTM Zone 36S)**
- This CRS was validated from your Angata farm coordinates
- Explains why this specific CRS is required
4. **Field Name Matching**
- Compares field names between Excel and GeoJSON
- Shows which fields exist in only one dataset
- Highlights misspellings or missing fields
- Provides complete matching summary table
5. **Data Type & Content Validation**
- Checks column data types:
- `year`: Must be integer
- `season_start`, `season_end`: Must be valid dates
- `age`, `sub_area`, `tonnage_ha`: Must be numeric (decimal)
- Identifies rows with missing `season_start` dates
- Flags invalid date formats and numeric values
## File Requirements
### Excel File (harvest.xlsx)
```
| field | sub_field | year | season_start | season_end | age | sub_area | tonnage_ha |
|----------|------------------|------|--------------|------------|-----|----------|-----------|
| kowawa | kowawa | 2023 | 2023-01-15 | 2024-01-14 | 1.5 | 45 | 125.5 |
| Tamu | Tamu Upper | 2023 | 2023-02-01 | 2024-01-31 | 1.0 | 30 | 98.0 |
```
**Data Types:**
- `field`, `sub_field`: Text (can be numeric as text)
- `year`: Integer
- `season_start`, `season_end`: Date (YYYY-MM-DD format)
- `age`, `sub_area`, `tonnage_ha`: Decimal/Float
**Extra columns** are allowed but will not be processed.
### GeoJSON File (pivot.geojson)
```json
{
"type": "FeatureCollection",
"crs": {
"type": "name",
"properties": {
"name": "urn:ogc:def:crs:EPSG::32736"
}
},
"features": [
{
"type": "Feature",
"properties": {
"field": "kowawa",
"sub_field": "kowawa"
},
"geometry": {
"type": "MultiPolygon",
"coordinates": [...]
}
}
]
}
```
**Required Properties:**
- `field`: Field identifier (must match Excel)
- `sub_field`: Sub-field identifier (must match Excel)
**Optional Properties:**
- `STATUS`, `name`, `age`, etc. - These are allowed but not required
**CRS:**
- Must be EPSG:32736 (UTM Zone 36S)
- This was determined from analyzing your Angata farm coordinates
## Deployment
### Local Use (Recommended for Security)
1. Download the `data_validation_tool` folder
2. Open `index.html` in a web browser
3. Files are processed entirely client-side - no data is sent to servers
### Netlify Deployment
1. Connect to your GitHub repository
2. Set build command: `None`
3. Set publish directory: `data_validation_tool`
4. Deploy
Or use Netlify CLI:
```bash
npm install -g netlify-cli
netlify deploy --dir data_validation_tool
```
### Manual Testing
1. Use the provided sample files:
- Excel: `laravel_app/storage/app/aura/Data/harvest.xlsx`
- GeoJSON: `laravel_app/storage/app/aura/Data/pivot.geojson`
2. Open `index.html`
3. Upload both files
4. Review validation results
## Technical Details
### Browser Requirements
- Modern browser with ES6 support (Chrome, Firefox, Safari, Edge)
- Must support FileReader API and JSON parsing
- Requires XLSX library for Excel parsing
### Dependencies
- **XLSX.js**: For reading Excel files (loaded via CDN in index.html)
### What Happens When You Upload
1. File is read into memory (client-side only)
2. Excel: Parsed using XLSX library into JSON
3. GeoJSON: Parsed directly as JSON
4. All validation runs in your browser
5. Results displayed locally
6. **No files are sent to any server**
## Validation Rules
### Traffic Light Logic
**All GREEN (✓ Passed)**
- All required columns/properties present
- Correct CRS
- All field names match
- All data types valid
**YELLOW (⚠️ Warnings)**
- Extra columns detected (will be ignored)
- Extra properties detected (will be ignored)
- Missing dates in some fields
- Data type issues in specific rows
**RED (✗ Failed)**
- Missing required columns/properties
- Wrong CRS
- Field names mismatch between files
- Fundamental data structure issues
### CRS Explanation
From your project's geospatial analysis:
- **Original issue**: Angata farm GeoJSON had coordinates in UTM Zone 37S but marked as WGS84
- **Root cause**: UTM Zone mismatch - farm is actually in UTM Zone 36S
- **Solution**: Reproject to EPSG:32736 (UTM Zone 36S)
- **Why**: This aligns with actual Angata farm coordinates (longitude ~34.4°E)
## Troubleshooting
### "Failed to read Excel file"
- Ensure file is `.xlsx` format
- File should not be open in Excel while uploading
- Try saving as Excel 2007+ format
### "Failed to parse GeoJSON"
- Ensure file is valid JSON
- Check for syntax errors (extra commas, missing brackets)
- Use online JSON validator at jsonlint.com
### "Wrong CRS detected"
- GeoJSON must explicitly state CRS as EPSG:32736
- Example: `"name": "urn:ogc:def:crs:EPSG::32736"`
- Reproject in QGIS or R if needed
### "Field names don't match"
- Check for typos and capitalization differences
- Spaces at beginning/end of field names
- Use field names exactly as they appear in both files
## Future Enhancements
- [ ] Download validation report as PDF
- [ ] Batch upload multiple Excel/GeoJSON pairs
- [ ] Auto-detect and suggest field mappings
- [ ] Geometry validity checks (self-intersecting polygons)
- [ ] Area comparison between Excel and GeoJSON
- [ ] Export cleaned/standardized files
## Support
For questions about data validation requirements, contact the SmartCane team.
---
**Tool Version**: 1.0
**Last Updated**: December 2025
**CRS Reference**: EPSG:32736 (UTM Zone 36S)

View file

@ -1,396 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>SmartCane Data Validation Tool</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
}
header {
background: white;
padding: 30px;
border-radius: 8px;
margin-bottom: 20px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
text-align: center;
}
h1 {
color: #333;
margin-bottom: 10px;
}
.subtitle {
color: #666;
font-size: 14px;
}
.upload-section {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-bottom: 20px;
}
.upload-card {
background: white;
padding: 30px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
.upload-card h2 {
font-size: 18px;
color: #333;
margin-bottom: 15px;
display: flex;
align-items: center;
gap: 10px;
}
.file-icon {
font-size: 24px;
}
.file-input-wrapper {
position: relative;
display: inline-block;
width: 100%;
}
.file-input-label {
display: block;
padding: 20px;
border: 2px dashed #667eea;
border-radius: 6px;
text-align: center;
cursor: pointer;
transition: all 0.3s;
background: #f8f9ff;
}
.file-input-label:hover {
border-color: #764ba2;
background: #f0f1ff;
}
.file-input-wrapper input[type="file"] {
display: none;
}
.file-name {
margin-top: 10px;
font-size: 14px;
color: #667eea;
font-weight: 500;
}
.results-section {
background: white;
padding: 30px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
display: none;
max-width: 100%;
}
.results-section.show {
display: block;
}
.results-section h2 {
color: #333;
margin-bottom: 25px;
padding-bottom: 15px;
border-bottom: 3px solid #667eea;
}
.traffic-light {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
margin-bottom: 30px;
}
.check-item {
padding: 20px;
border-radius: 8px;
display: flex;
align-items: center;
gap: 12px;
font-weight: 500;
border-left: 4px solid;
}
.check-item.pass {
background: #d4edda;
color: #155724;
border-left-color: #28a745;
}
.check-item.warning {
background: #fff3cd;
color: #856404;
border-left-color: #ffc107;
}
.check-item.fail {
background: #f8d7da;
color: #721c24;
border-left-color: #dc3545;
}
.light {
font-size: 24px;
flex-shrink: 0;
}
.light.green::before { content: "🟢"; }
.light.yellow::before { content: "🟡"; }
.light.red::before { content: "🔴"; }
.details-section {
margin-top: 30px;
border-top: 1px solid #eee;
padding-top: 20px;
}
.details-section h3 {
font-size: 16px;
color: #333;
margin-bottom: 15px;
padding-bottom: 10px;
border-bottom: 2px solid #667eea;
margin-top: 25px;
}
.details-section > div:first-child h3 {
margin-top: 0;
}
.message-box {
padding: 15px;
margin-bottom: 15px;
border-radius: 6px;
font-size: 14px;
line-height: 1.5;
}
.message-box.error {
background: #f8d7da;
color: #721c24;
border-left: 4px solid #dc3545;
}
.message-box.warning {
background: #fff3cd;
color: #856404;
border-left: 4px solid #ffc107;
}
.message-box.info {
background: #d1ecf1;
color: #0c5460;
border-left: 4px solid #17a2b8;
}
.message-box.success {
background: #d4edda;
color: #155724;
border-left: 4px solid #28a745;
}
table {
width: 100%;
border-collapse: collapse;
margin-top: 15px;
font-size: 14px;
}
th {
background: #667eea;
color: white;
padding: 12px;
text-align: left;
font-weight: 600;
}
td {
padding: 10px 12px;
border-bottom: 1px solid #eee;
}
tr:hover {
background: #f8f9ff;
}
.match {
color: #28a745;
font-weight: 500;
}
.mismatch {
color: #dc3545;
font-weight: 500;
}
.missing {
color: #ffc107;
font-weight: 500;
}
.field-list {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
gap: 10px;
margin-top: 15px;
}
.field-badge {
background: #e9ecef;
padding: 8px 12px;
border-radius: 4px;
font-size: 13px;
border-left: 3px solid;
}
.field-badge.missing {
background: #fff3cd;
border-left-color: #ffc107;
color: #856404;
}
.field-badge.extra {
background: #d1ecf1;
border-left-color: #17a2b8;
color: #0c5460;
}
.validation-row {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 10px;
margin-top: 15px;
}
.validation-item {
background: #f8f9ff;
padding: 10px;
border-radius: 4px;
font-size: 13px;
border-left: 3px solid;
}
.validation-item.valid {
border-left-color: #28a745;
}
.validation-item.invalid {
border-left-color: #dc3545;
}
@media (max-width: 768px) {
.upload-section {
grid-template-columns: 1fr;
}
.traffic-light {
grid-template-columns: 1fr;
}
}
footer {
background: white;
padding: 20px;
border-radius: 8px;
margin-top: 20px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
text-align: center;
font-size: 13px;
color: #666;
}
footer a {
color: #667eea;
text-decoration: none;
font-weight: 600;
}
footer a:hover {
text-decoration: underline;
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>🌾 SmartCane Data Validation Tool</h1>
<p class="subtitle">Validate your Excel and GeoJSON files before uploading to the system</p>
</header>
<div class="upload-section">
<div class="upload-card">
<h2><span class="file-icon">📊</span>Excel File (Harvest Data)</h2>
<p style="font-size: 13px; color: #666; margin-bottom: 15px;">Required columns: field, sub_field, year, season_start, season_end, age, sub_area, tonnage_ha</p>
<div class="file-input-wrapper" id="excelDropZone">
<label class="file-input-label" for="excelFile">
<div>Drop your Excel file here<br><small>or click to browse</small></div>
<div class="file-name" id="excelFileName"></div>
</label>
<input type="file" id="excelFile" accept=".xlsx,.xls" />
</div>
</div>
<div class="upload-card">
<h2><span class="file-icon">🗺️</span>GeoJSON File (Field Boundaries)</h2>
<p style="font-size: 13px; color: #666; margin-bottom: 15px;">Required properties: field, sub_field</p>
<div class="file-input-wrapper" id="geojsonDropZone">
<label class="file-input-label" for="geojsonFile">
<div>Drop your GeoJSON file here<br><small>or click to browse</small></div>
<div class="file-name" id="geojsonFileName"></div>
</label>
<input type="file" id="geojsonFile" accept=".geojson,.json" />
</div>
</div>
</div>
<div style="text-align: center; margin-bottom: 20px;">
<button id="checkButton" style="padding: 12px 40px; font-size: 16px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none; border-radius: 6px; cursor: pointer; font-weight: 600; display: none;">
✓ Check Files
</button>
</div>
<div class="results-section" id="resultsSection">
<h2 style="margin-bottom: 20px; color: #333;">Validation Results</h2>
<div class="traffic-light" id="trafficLight"></div>
<div class="details-section" id="detailsSection"></div>
</div>
<footer>
SmartCane Data Validation Tool | Learn more at <a href="https://www.smartcane.ag" target="_blank">www.smartcane.ag</a>
</footer>
</div>
<script src="https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js"></script>
<script src="validator.js"></script>
</body>
</html>

View file

@ -1,698 +0,0 @@
// Configuration
const CONFIG = {
REQUIRED_EXCEL_COLUMNS: ['field', 'sub_field', 'year', 'season_start', 'season_end', 'tonnage_ha'],
OPTIONAL_EXCEL_COLUMNS: ['age', 'sub_area'], // age is calculated in script, sub_area is optional
REQUIRED_GEOJSON_PROPERTIES: ['field', 'sub_field'],
VALID_CRS: 'EPSG:32736', // UTM 36S - the correct CRS we learned from the conversation
CRS_DESCRIPTION: 'EPSG:32736 (UTM Zone 36S) - This is the correct CRS learned from geospatial analysis of Angata farm coordinates'
};
let excelData = null;
let geojsonData = null;
let excelLoaded = false;
let geojsonLoaded = false;
// File input handlers
document.getElementById('excelFile').addEventListener('change', handleExcelFile);
document.getElementById('geojsonFile').addEventListener('change', handleGeojsonFile);
document.getElementById('checkButton').addEventListener('click', validateData);
function updateCheckButton() {
const checkButton = document.getElementById('checkButton');
if (excelLoaded && geojsonLoaded) {
checkButton.style.display = 'inline-block';
} else {
checkButton.style.display = 'none';
}
}
// Drag and drop handlers for Excel
const excelDropZone = document.getElementById('excelDropZone');
excelDropZone.addEventListener('dragover', (e) => {
e.preventDefault();
e.stopPropagation();
excelDropZone.style.backgroundColor = '#f0f1ff';
});
excelDropZone.addEventListener('dragleave', (e) => {
e.preventDefault();
e.stopPropagation();
excelDropZone.style.backgroundColor = 'transparent';
});
excelDropZone.addEventListener('drop', (e) => {
e.preventDefault();
e.stopPropagation();
excelDropZone.style.backgroundColor = 'transparent';
const files = e.dataTransfer.files;
if (files.length > 0) {
document.getElementById('excelFile').files = files;
handleExcelFile({ target: { files: files } });
}
});
// Drag and drop handlers for GeoJSON
const geojsonDropZone = document.getElementById('geojsonDropZone');
geojsonDropZone.addEventListener('dragover', (e) => {
e.preventDefault();
e.stopPropagation();
geojsonDropZone.style.backgroundColor = '#f0f1ff';
});
geojsonDropZone.addEventListener('dragleave', (e) => {
e.preventDefault();
e.stopPropagation();
geojsonDropZone.style.backgroundColor = 'transparent';
});
geojsonDropZone.addEventListener('drop', (e) => {
e.preventDefault();
e.stopPropagation();
geojsonDropZone.style.backgroundColor = 'transparent';
const files = e.dataTransfer.files;
if (files.length > 0) {
document.getElementById('geojsonFile').files = files;
handleGeojsonFile({ target: { files: files } });
}
});
function handleExcelFile(e) {
const file = e.target.files[0];
if (!file) return;
document.getElementById('excelFileName').textContent = `${file.name}`;
const reader = new FileReader();
reader.onload = (event) => {
try {
const data = new Uint8Array(event.target.result);
const workbook = XLSX.read(data, { type: 'array' });
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
excelData = XLSX.utils.sheet_to_json(worksheet);
excelLoaded = true;
updateCheckButton();
} catch (error) {
document.getElementById('excelFileName').textContent = `✗ Error: ${error.message}`;
excelLoaded = false;
updateCheckButton();
}
};
reader.onerror = () => {
document.getElementById('excelFileName').textContent = `✗ Failed to read file`;
excelLoaded = false;
updateCheckButton();
};
reader.readAsArrayBuffer(file);
}
function handleGeojsonFile(e) {
const file = e.target.files[0];
if (!file) return;
document.getElementById('geojsonFileName').textContent = `${file.name}`;
const reader = new FileReader();
reader.onload = (event) => {
try {
geojsonData = JSON.parse(event.target.result);
geojsonLoaded = true;
updateCheckButton();
} catch (error) {
document.getElementById('geojsonFileName').textContent = `✗ Invalid JSON: ${error.message}`;
geojsonLoaded = false;
updateCheckButton();
}
};
reader.onerror = () => {
document.getElementById('geojsonFileName').textContent = `✗ Failed to read file`;
geojsonLoaded = false;
updateCheckButton();
};
reader.readAsText(file);
}
function validateData() {
if (!excelData || !geojsonData) {
alert('Please upload both Excel and GeoJSON files before checking.');
return;
}
const results = {
checks: [],
details: []
};
// 1. Excel column validation
const excelColumnCheck = validateExcelColumns();
results.checks.push(excelColumnCheck);
results.details.push(excelColumnCheck.details);
// 2. GeoJSON properties validation
const geojsonPropsCheck = validateGeojsonProperties();
results.checks.push(geojsonPropsCheck);
results.details.push(geojsonPropsCheck.details);
// 3. CRS validation
const crsCheck = validateCRS();
results.checks.push(crsCheck);
results.details.push(crsCheck.details);
// 4. Field name matching
const fieldMatchCheck = validateFieldMatching();
results.checks.push(fieldMatchCheck);
results.details.push(fieldMatchCheck.details);
// 5. Data type and content validation
const dataValidationCheck = validateDataTypes();
results.checks.push(dataValidationCheck);
results.details.push(dataValidationCheck.details);
displayResults(results);
}
function validateExcelColumns() {
const excelColumns = Object.keys(excelData[0] || {});
const missing = CONFIG.REQUIRED_EXCEL_COLUMNS.filter(col => !excelColumns.includes(col));
const hasOptional = CONFIG.OPTIONAL_EXCEL_COLUMNS.filter(col => excelColumns.includes(col));
const notRequired = excelColumns.filter(col => !CONFIG.REQUIRED_EXCEL_COLUMNS.includes(col) && !CONFIG.OPTIONAL_EXCEL_COLUMNS.includes(col));
let status = 'pass';
let message = 'All required columns present';
if (missing.length > 0) {
status = 'fail';
message = `Missing required columns: ${missing.join(', ')}`;
} else if (notRequired.length > 0) {
status = 'warning';
message = `Extra columns detected (will be ignored): ${notRequired.join(', ')}`;
}
return {
name: 'Excel Columns',
status: status,
message: message,
details: {
title: 'Excel Column Validation',
type: 'columns',
required: CONFIG.REQUIRED_EXCEL_COLUMNS,
optional: CONFIG.OPTIONAL_EXCEL_COLUMNS,
found: excelColumns,
missing: missing,
hasOptional: hasOptional,
extra: notRequired
}
};
}
function validateGeojsonProperties() {
if (!geojsonData.features || geojsonData.features.length === 0) {
return {
name: 'GeoJSON Properties',
status: 'fail',
message: 'GeoJSON has no features',
details: {
title: 'GeoJSON Property Validation',
type: 'properties',
error: 'No features found in GeoJSON'
}
};
}
const allProperties = new Set();
const missingInFeatures = [];
geojsonData.features.forEach((feature, idx) => {
const props = feature.properties || {};
Object.keys(props).forEach(p => allProperties.add(p));
CONFIG.REQUIRED_GEOJSON_PROPERTIES.forEach(reqProp => {
if (!props[reqProp]) {
missingInFeatures.push({ feature: idx, property: reqProp, field: props.field || 'Unknown' });
}
});
});
const extra = Array.from(allProperties).filter(p => !CONFIG.REQUIRED_GEOJSON_PROPERTIES.includes(p));
let status = 'pass';
let message = 'All required properties present in all features';
if (missingInFeatures.length > 0) {
status = 'fail';
message = `Missing properties in ${missingInFeatures.length} feature(s)`;
} else if (extra.length > 0) {
status = 'warning';
message = `Extra properties detected: ${extra.join(', ')}`;
}
return {
name: 'GeoJSON Properties',
status: status,
message: message,
details: {
title: 'GeoJSON Property Validation',
type: 'properties',
required: CONFIG.REQUIRED_GEOJSON_PROPERTIES,
found: Array.from(allProperties),
extra: extra,
missingInFeatures: missingInFeatures
}
};
}
function validateCRS() {
const crs = geojsonData.crs;
let detectedCRS = 'Not specified';
let status = 'fail';
let message = `CRS not specified. Expected: ${CONFIG.VALID_CRS}`;
if (crs) {
if (crs.type === 'name' && crs.properties?.name) {
detectedCRS = crs.properties.name;
// Check for various CRS string formats
if (detectedCRS.includes('32736') || detectedCRS.includes('UTM') && detectedCRS.includes('36')) {
status = 'pass';
message = `✓ Correct CRS detected: ${detectedCRS}`;
} else {
status = 'fail';
message = `Wrong CRS: ${detectedCRS}. Expected: ${CONFIG.VALID_CRS}`;
}
}
}
return {
name: 'Coordinate Reference System',
status: status,
message: message,
details: {
title: 'CRS Validation',
type: 'crs',
expected: CONFIG.VALID_CRS,
description: CONFIG.CRS_DESCRIPTION,
detected: detectedCRS,
crsObject: crs
}
};
}
function validateFieldMatching() {
const excelFields = new Set(excelData.map(row => String(row.field).trim()));
const geojsonFields = new Set(geojsonData.features.map(f => String(f.properties.field).trim()));
const matchingFields = Array.from(excelFields).filter(f => geojsonFields.has(f));
const excelOnly = Array.from(excelFields).filter(f => !geojsonFields.has(f));
const geojsonOnly = Array.from(geojsonFields).filter(f => !excelFields.has(f));
let status = 'pass';
let message = 'All field names match between Excel and GeoJSON';
if (excelOnly.length > 0 || geojsonOnly.length > 0) {
status = 'fail';
message = `Field name mismatches detected: ${excelOnly.length} in Excel only, ${geojsonOnly.length} in GeoJSON only`;
}
// Create matching table
const matchingTable = [];
excelFields.forEach(field => {
const inGeojson = geojsonFields.has(field);
matchingTable.push({
field: field,
excel: true,
geojson: inGeojson,
status: inGeojson ? 'match' : 'mismatch'
});
});
geojsonOnly.forEach(field => {
matchingTable.push({
field: field,
excel: false,
geojson: true,
status: 'mismatch'
});
});
return {
name: 'Field Name Matching',
status: status,
message: message,
details: {
title: 'Field Name Matching',
type: 'fieldMatching',
matching: matchingFields,
excelOnly: excelOnly,
geojsonOnly: geojsonOnly,
matchingTable: matchingTable
}
};
}
function validateDataTypes() {
const issues = [];
const missingDates = [];
const invalidYears = [];
const invalidNumerics = [];
excelData.forEach((row, idx) => {
// Check season_start
if (!row.season_start || row.season_start === '') {
missingDates.push({ row: idx + 2, field: row.field, column: 'season_start' });
} else if (!isValidDate(row.season_start)) {
invalidYears.push({ row: idx + 2, field: row.field, column: 'season_start', value: row.season_start });
}
// Check year
if (!Number.isInteger(parseFloat(row.year))) {
invalidYears.push({ row: idx + 2, field: row.field, column: 'year', value: row.year });
}
// Check numeric columns (age is optional, sub_area is text, not numeric)
['tonnage_ha'].forEach(col => {
const val = row[col];
if (val !== '' && val !== null && isNaN(parseFloat(val))) {
invalidNumerics.push({ row: idx + 2, field: row.field, column: col, value: val });
}
});
});
let status = 'pass';
let message = 'All data types valid';
if (missingDates.length > 0 || invalidYears.length > 0 || invalidNumerics.length > 0) {
status = 'warning';
message = `Data validation issues found: ${missingDates.length} missing dates, ${invalidYears.length} invalid years/dates, ${invalidNumerics.length} invalid numerics`;
}
return {
name: 'Data Validation',
status: status,
message: message,
details: {
title: 'Data Type & Content Validation',
type: 'dataValidation',
missingDates: missingDates,
invalidYears: invalidYears,
invalidNumerics: invalidNumerics
}
};
}
function isValidDate(dateString) {
if (!dateString) return false;
const date = new Date(dateString);
return date instanceof Date && !isNaN(date);
}
function displayResults(results) {
const trafficLight = document.getElementById('trafficLight');
const detailsSection = document.getElementById('detailsSection');
const resultsSection = document.getElementById('resultsSection');
trafficLight.innerHTML = '';
detailsSection.innerHTML = '';
// Display traffic lights
results.checks.forEach(check => {
const light = document.createElement('div');
light.className = `check-item ${check.status}`;
light.innerHTML = `
<span class="light ${check.status === 'pass' ? 'green' : check.status === 'warning' ? 'yellow' : 'red'}"></span>
<div>
<strong>${check.name}</strong>
<div style="font-size: 13px; margin-top: 4px;">${check.message}</div>
</div>
`;
trafficLight.appendChild(light);
});
// Display details
results.details.forEach(detail => {
if (detail.type === 'columns') {
detailsSection.appendChild(createColumnDetails(detail));
} else if (detail.type === 'properties') {
detailsSection.appendChild(createPropertiesDetails(detail));
} else if (detail.type === 'crs') {
detailsSection.appendChild(createCRSDetails(detail));
} else if (detail.type === 'fieldMatching') {
detailsSection.appendChild(createFieldMatchingDetails(detail));
} else if (detail.type === 'dataValidation') {
detailsSection.appendChild(createDataValidationDetails(detail));
}
});
resultsSection.classList.add('show');
}
function createColumnDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
// Required columns
section.innerHTML += `
<div style="margin-bottom: 15px;">
<strong>Required Columns:</strong>
<div class="field-list" style="margin-top: 8px;">
${detail.required.map(col => `<div class="field-badge" style="border-left-color: #28a745; background: #d4edda; color: #155724;">${col}</div>`).join('')}
</div>
</div>
`;
// Optional columns
if (detail.optional && detail.optional.length > 0) {
section.innerHTML += `
<div style="margin-bottom: 15px;">
<strong>Optional Columns (not required):</strong>
<div class="field-list" style="margin-top: 8px;">
${detail.optional.map(col => `<div class="field-badge" style="border-left-color: #17a2b8; background: #d1ecf1; color: #0c5460;">${col}</div>`).join('')}
</div>
<small style="display: block; margin-top: 8px;"> <em>${detail.optional.join(', ')} ${detail.optional.length === 1 ? 'is' : 'are'} calculated in the system or optional</em></small>
</div>
`;
}
if (detail.missing.length > 0) {
section.innerHTML += `
<div class="message-box error">
<strong> Missing Required Columns:</strong><br>${detail.missing.join(', ')}
</div>
`;
}
if (detail.extra.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong> Extra Columns (will be ignored):</strong><br>${detail.extra.join(', ')}
</div>
`;
}
if (detail.missing.length === 0 && detail.extra.length === 0) {
section.innerHTML += `
<div class="message-box success">
<strong> Perfect!</strong> All required columns present.
</div>
`;
}
return section;
}
function createPropertiesDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
if (detail.error) {
section.innerHTML += `<div class="message-box error">${detail.error}</div>`;
return section;
}
if (detail.missingInFeatures && detail.missingInFeatures.length > 0) {
section.innerHTML += `
<div class="message-box error">
<strong> Missing Properties in Features:</strong>
<table>
<tr><th>Feature #</th><th>Field Name</th><th>Missing Property</th></tr>
${detail.missingInFeatures.map(m => `<tr><td>${m.feature}</td><td>${m.field}</td><td>${m.property}</td></tr>`).join('')}
</table>
</div>
`;
}
if (detail.extra && detail.extra.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong> Extra Properties (redundant):</strong><br>${detail.extra.join(', ')}<br>
<small>These will be ignored during processing.</small>
</div>
`;
}
if ((!detail.missingInFeatures || detail.missingInFeatures.length === 0) && (!detail.extra || detail.extra.length === 0)) {
section.innerHTML += `
<div class="message-box success">
<strong> Perfect!</strong> All required properties present in all ${geojsonData.features.length} features.
</div>
`;
}
return section;
}
function createCRSDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
if (detail.detected === 'Not specified') {
section.innerHTML += `
<div class="message-box error">
<strong> CRS Not Specified</strong><br>
Expected: <code>${detail.expected}</code><br>
${detail.description}
</div>
`;
} else if (detail.detected.includes('32736') || (detail.detected.includes('UTM') && detail.detected.includes('36'))) {
section.innerHTML += `
<div class="message-box success">
<strong> Correct CRS</strong><br>
Detected: <code>${detail.detected}</code><br>
${detail.description}
</div>
`;
} else {
section.innerHTML += `
<div class="message-box error">
<strong> Wrong CRS</strong><br>
Expected: <code>${detail.expected}</code><br>
Detected: <code>${detail.detected}</code><br>
${detail.description}
</div>
`;
}
if (detail.crsObject) {
section.innerHTML += `
<div style="margin-top: 15px; padding: 10px; background: #f8f9ff; border-radius: 4px; font-size: 12px;">
<strong>CRS Details:</strong><br>
<code>${JSON.stringify(detail.crsObject, null, 2)}</code>
</div>
`;
}
return section;
}
function createFieldMatchingDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
if (detail.excelOnly.length > 0) {
section.innerHTML += `
<div class="message-box error">
<strong> Fields in Excel but NOT in GeoJSON (${detail.excelOnly.length}):</strong>
<div class="field-list">
${detail.excelOnly.map(f => `<div class="field-badge missing">${f}</div>`).join('')}
</div>
<small style="display: block; margin-top: 10px;">These fields exist in your harvest data but have no boundaries defined in the GeoJSON.</small>
</div>
`;
}
if (detail.geojsonOnly.length > 0) {
section.innerHTML += `
<div class="message-box error">
<strong> Fields in GeoJSON but NOT in Excel (${detail.geojsonOnly.length}):</strong>
<div class="field-list">
${detail.geojsonOnly.map(f => `<div class="field-badge extra">${f}</div>`).join('')}
</div>
<small style="display: block; margin-top: 10px;">These fields have boundaries defined but no data in your harvest spreadsheet.</small>
</div>
`;
}
if (detail.matching.length > 0) {
section.innerHTML += `
<div class="message-box success">
<strong> Matching Fields (${detail.matching.length}):</strong>
<div class="field-list">
${detail.matching.map(f => `<div class="field-badge" style="border-left-color: #28a745; background: #d4edda; color: #155724;">${f}</div>`).join('')}
</div>
</div>
`;
}
// Full matching table
section.innerHTML += `
<div style="margin-top: 20px;">
<strong>Complete Field Summary:</strong>
<table>
<tr>
<th>Field Name</th>
<th>In Excel</th>
<th>In GeoJSON</th>
<th>Status</th>
</tr>
${detail.matchingTable.map(row => `
<tr>
<td><strong>${row.field}</strong></td>
<td>${row.excel ? '✓' : '✗'}</td>
<td>${row.geojson ? '✓' : '✗'}</td>
<td><span class="${row.status}">${row.status === 'match' ? '🟢 Match' : '🔴 Mismatch'}</span></td>
</tr>
`).join('')}
</table>
</div>
`;
return section;
}
function createDataValidationDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
if (detail.missingDates.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong> Missing season_start dates (${detail.missingDates.length}):</strong>
<table style="font-size: 13px;">
<tr><th>Row #</th><th>Field Name</th></tr>
${detail.missingDates.map(m => `<tr><td>${m.row}</td><td>${m.field}</td></tr>`).join('')}
</table>
</div>
`;
}
if (detail.invalidYears.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong> Invalid dates/years (${detail.invalidYears.length}):</strong>
<table style="font-size: 13px;">
<tr><th>Row #</th><th>Field Name</th><th>Column</th><th>Value</th></tr>
${detail.invalidYears.map(m => `<tr><td>${m.row}</td><td>${m.field}</td><td>${m.column}</td><td>${m.value}</td></tr>`).join('')}
</table>
</div>
`;
}
if (detail.invalidNumerics.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong> Invalid numeric values (${detail.invalidNumerics.length}):</strong>
<table style="font-size: 13px;">
<tr><th>Row #</th><th>Field Name</th><th>Column</th><th>Value</th></tr>
${detail.invalidNumerics.map(m => `<tr><td>${m.row}</td><td>${m.field}</td><td>${m.column}</td><td>${m.value}</td></tr>`).join('')}
</table>
</div>
`;
}
if (detail.missingDates.length === 0 && detail.invalidYears.length === 0 && detail.invalidNumerics.length === 0) {
section.innerHTML += `
<div class="message-box success">
<strong> All data types valid!</strong> No missing dates or invalid values detected.
</div>
`;
}
return section;
}
function showError(fileType, message) {
alert(`${fileType} Error: ${message}`);
}

View file

@ -1,50 +0,0 @@
library(terra)
library(sf)
# Check the mosaic
mosaic <- terra::rast('laravel_app/storage/app/angata/weekly_mosaic/week_52_2025.tif')
cat('Mosaic info:\n')
cat(' Layers:', terra::nlyr(mosaic), '\n')
ext_vals <- c(terra::ext(mosaic)$xmin, terra::ext(mosaic)$xmax, terra::ext(mosaic)$ymin, terra::ext(mosaic)$ymax)
cat(' Extent:', paste(round(ext_vals, 2), collapse=', '), '\n')
# Extract band 5
band5 <- mosaic[[5]]
cat('Band 5 (CI):\n')
min_val <- as.numeric(terra::global(band5, 'min', na.rm=TRUE))
max_val <- as.numeric(terra::global(band5, 'max', na.rm=TRUE))
cat(' Min:', round(min_val, 3), '\n')
cat(' Max:', round(max_val, 3), '\n')
# Check field boundaries
geojson_path <- 'laravel_app/storage/app/angata/Data/pivot.geojson'
fields <- sf::st_read(geojson_path, quiet=TRUE)
cat('\nTesting extraction on first field:\n')
# Get first field
field_1 <- fields[1, ]
field_id <- field_1$field
cat(' Field ID:', field_id, '\n')
# Try extraction
tryCatch({
field_geom <- terra::vect(sf::as_Spatial(field_1))
cat(' Geometry CRS:', terra::crs(field_geom), '\n')
cat(' Raster CRS:', terra::crs(band5), '\n')
result <- terra::extract(band5, field_geom)
cat(' Extract result rows:', nrow(result), '\n')
cat(' Extract result cols:', ncol(result), '\n')
if (nrow(result) > 0) {
vals <- result[, 2]
cat(' Values extracted:', length(vals), '\n')
cat(' Non-NA values:', sum(!is.na(vals)), '\n')
if (sum(!is.na(vals)) > 0) {
cat(' Range of non-NA values:', min(vals, na.rm=TRUE), 'to', max(vals, na.rm=TRUE), '\n')
}
}
}, error = function(e) {
cat(' ERROR:', e$message, '\n')
})

View file

@ -1,15 +0,0 @@
# Quick script to examine KPI results
field_details <- readRDS('laravel_app/storage/app/esa/reports/kpis/esa_field_details_week39.rds')
summary_tables <- readRDS('laravel_app/storage/app/esa/reports/kpis/esa_kpi_summary_tables_week39.rds')
cat("=== FIELD DETAILS ===\n")
print(head(field_details, 20))
cat("\nTotal rows:", nrow(field_details), "\n\n")
cat("=== TCH FORECASTED FIELD RESULTS ===\n")
tch_results <- readRDS('laravel_app/storage/app/esa/reports/kpis/field_level/tch_forecasted_field_results_week39.rds')
print(tch_results)
cat("\nNumber of predictions:", nrow(tch_results), "\n\n")
cat("=== SUMMARY TABLES ===\n")
print(summary_tables$tch_forecasted)

View file

@ -1,48 +0,0 @@
# Generate Interactive SAR Report
# ===============================
cat("Generating interactive SAR exploration report...\n")
# Install rmarkdown if needed
if (!require(rmarkdown)) {
install.packages("rmarkdown")
library(rmarkdown)
}
# Set working directory
if (basename(getwd()) != "smartcane") {
stop("Please run this from the main smartcane directory")
}
# Render the report
report_file <- "r_app/SAR_exploration_report.Rmd"
output_file <- "output/SAR_exploration_report.html"
cat("Rendering report:", report_file, "\n")
cat("Output file:", output_file, "\n")
# Render with error handling
tryCatch({
rmarkdown::render(
input = report_file,
output_file = output_file,
output_format = "html_document",
quiet = FALSE
)
cat("\n✓ Report generated successfully!\n")
cat("Open", output_file, "in your browser to view the interactive maps.\n")
# Try to open in browser (Windows)
if (.Platform$OS.type == "windows") {
shell.exec(normalizePath(output_file))
}
}, error = function(e) {
cat("✗ Error generating report:\n")
cat(conditionMessage(e), "\n")
# Try with minimal content first
cat("\nTrying minimal report generation...\n")
cat("Check the console output above for specific errors.\n")
})

32983
get-pip.py

File diff suppressed because it is too large Load diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

View file

@ -1,27 +0,0 @@
# Quick script to inspect the actual band structure of 8-band imagery
library(terra)
sample_tif <- "laravel_app/storage/app/esa/merged_tif_8b/2025-01-15.tif"
r <- rast(sample_tif)
cat("Number of bands:", nlyr(r), "\n\n")
# Check each band's values
for (i in 1:nlyr(r)) {
band <- r[[i]]
vals <- values(band, mat=FALSE)
vals_sample <- vals[!is.na(vals)][1:100]
cat("Band", i, ":\n")
cat(" Name:", names(r)[i], "\n")
cat(" Sample values:", paste(head(vals_sample, 10), collapse = ", "), "\n")
cat(" Min:", min(vals, na.rm=TRUE), "\n")
cat(" Max:", max(vals, na.rm=TRUE), "\n")
cat(" Mean:", mean(vals, na.rm=TRUE), "\n\n")
}
# Check if band 9 is actually a mask or quality band
cat("\nBand 9 unique values (first 50):\n")
band9_vals <- values(r[[9]], mat=FALSE)
print(head(unique(band9_vals[!is.na(band9_vals)]), 50))

View file

@ -1,28 +0,0 @@
# Quick script to inspect band structure of merged_tif_8b files
library(terra)
library(here)
# Pick one file to inspect
test_file <- here("laravel_app/storage/app/esa/merged_tif_8b/2025-11-15.tif")
cat("=== INSPECTING BAND STRUCTURE ===\n\n")
cat(sprintf("File: %s\n\n", basename(test_file)))
# Load raster
rast_obj <- rast(test_file)
cat(sprintf("Number of bands: %d\n\n", nlyr(rast_obj)))
# Check each band
for (i in 1:nlyr(rast_obj)) {
band <- rast_obj[[i]]
band_vals <- values(band, mat = FALSE)
band_vals <- band_vals[!is.na(band_vals)]
cat(sprintf("Band %d:\n", i))
cat(sprintf(" Name: %s\n", names(band)))
cat(sprintf(" Values range: %.2f to %.2f\n", min(band_vals, na.rm = TRUE), max(band_vals, na.rm = TRUE)))
cat(sprintf(" Mean: %.2f\n", mean(band_vals, na.rm = TRUE)))
cat(sprintf(" Non-NA pixels: %d\n", length(band_vals)))
cat(sprintf(" Sample values: %s\n\n", paste(head(band_vals, 10), collapse = ", ")))
}

View file

@ -1,314 +0,0 @@
R version 4.4.3 (2025-02-28 ucrt) -- "Trophy Case"
Copyright (C) 2025 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
- Project 'C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane_v2/smartcane' loaded. [renv 1.1.4]
> # 09_CALCULATE_KPIS.R
> # ===================
> # This script calculates 6 Key Performance Indicators (KPIs) for sugarcane monitoring:
> # 1. Field Uniformity Summary
> # 2. Farm-wide Area Change Summary
> # 3. TCH Forecasted
> # 4. Growth Decline Index
> # 5. Weed Presence Score
> # 6. Gap Filling Score (placeholder)
> #
> # Usage: Rscript 09_calculate_kpis.R [end_date] [offset] [project_dir]
> # - end_date: End date for KPI calculation (YYYY-MM-DD format), default: today
> # - offset: Number of days to look back (not currently used for KPIs, but for consistency)
> # - project_dir: Project directory name (e.g., "aura", "esa")
>
> # 1. Load required libraries
> # -------------------------
> suppressPackageStartupMessages({
+ library(here)
+ library(sf)
+ library(terra)
+ library(dplyr)
+ library(tidyr)
+ library(lubridate)
+ library(readr)
+ library(caret)
+ library(CAST)
+ library(randomForest)
+ })
>
> # 2. Main function
> # --------------
> main <- function() {
+ # Process command line arguments
+ args <- commandArgs(trailingOnly = TRUE)
+
+ # Process end_date argument
+ if (length(args) >= 1 && !is.na(args[1])) {
+ end_date <- as.Date(args[1])
+ if (is.na(end_date)) {
+ warning("Invalid end_date provided. Using default (current date).")
+ end_date <- Sys.Date()
+ }
+ } else {
+ end_date <- Sys.Date()
+ }
+
+ # Process offset argument (for consistency with other scripts, not currently used)
+ if (length(args) >= 2 && !is.na(args[2])) {
+ offset <- as.numeric(args[2])
+ if (is.na(offset) || offset <= 0) {
+ warning("Invalid offset provided. Using default (7 days).")
+ offset <- 7
+ }
+ } else {
+ offset <- 7
+ }
+
+ # Process project_dir argument
+ if (length(args) >= 3 && !is.na(args[3])) {
+ project_dir <- as.character(args[3])
+ } else {
+ project_dir <- "esa" # Default project
+ }
+
+ # Make project_dir available globally so parameters_project.R can use it
+ assign("project_dir", project_dir, envir = .GlobalEnv)
+
+ # 3. Load utility functions and project configuration
+ # --------------------------------------------------
+
+ tryCatch({
+ source(here("r_app", "crop_messaging_utils.R"))
+ }, error = function(e) {
+ stop("Error loading crop_messaging_utils.R: ", e$message)
+ })
+
+ tryCatch({
+ source(here("r_app", "kpi_utils.R"))
+ }, error = function(e) {
+ stop("Error loading kpi_utils.R: ", e$message)
+ })
+
+ # Load project parameters (this sets up all directory paths and field boundaries)
+ tryCatch({
+ source(here("r_app", "parameters_project.R"))
+ }, error = function(e) {
+ stop("Error loading parameters_project.R: ", e$message)
+ })
+
+ # Load growth model utils if available (for yield prediction)
+ tryCatch({
+ source(here("r_app", "growth_model_utils.R"))
+ }, error = function(e) {
+ warning("growth_model_utils.R not found, yield prediction KPI will use placeholder data")
+ })
+
+ # Check if required variables exist
+ if (!exists("project_dir")) {
+ stop("project_dir must be set before running this script")
+ }
+
+ if (!exists("field_boundaries_sf") || is.null(field_boundaries_sf)) {
+ stop("Field boundaries not loaded. Check parameters_project.R initialization.")
+ }
+
+ # 4. Calculate all KPIs
+ # -------------------
+ output_dir <- file.path(reports_dir, "kpis")
+
+ kpi_results <- calculate_all_kpis(
+ report_date = end_date,
+ output_dir = output_dir,
+ field_boundaries_sf = field_boundaries_sf,
+ harvesting_data = harvesting_data,
+ cumulative_CI_vals_dir = cumulative_CI_vals_dir,
+ weekly_CI_mosaic = weekly_CI_mosaic,
+ reports_dir = reports_dir,
+ project_dir = project_dir
+ )
+
+ # 5. Print summary
+ # --------------
+ cat("\n=== KPI CALCULATION SUMMARY ===\n")
+ cat("Report Date:", as.character(kpi_results$metadata$report_date), "\n")
+ cat("Current Week:", kpi_results$metadata$current_week, "\n")
+ cat("Previous Week:", kpi_results$metadata$previous_week, "\n")
+ cat("Total Fields Analyzed:", kpi_results$metadata$total_fields, "\n")
+ cat("Calculation Time:", as.character(kpi_results$metadata$calculation_time), "\n")
+
+ cat("\nField Uniformity Summary:\n")
+ print(kpi_results$field_uniformity_summary)
+
+ cat("\nArea Change Summary:\n")
+ print(kpi_results$area_change)
+
+ cat("\nTCH Forecasted:\n")
+ print(kpi_results$tch_forecasted)
+
+ cat("\nGrowth Decline Index:\n")
+ print(kpi_results$growth_decline)
+
+ cat("\nWeed Presence Score:\n")
+ print(kpi_results$weed_presence)
+
+ cat("\nGap Filling Score:\n")
+ print(kpi_results$gap_filling)
+
+ cat("\n=== KPI CALCULATION COMPLETED ===\n")
+ }
>
> # 6. Script execution
> # -----------------
> if (sys.nframe() == 0) {
+ main()
+ }
[INFO] 2025-10-08 15:39:29 - Initializing project with directory: esa
[1] "model using cumulative_CI,DOY will be trained now..."
note: only 1 unique complexity parameters in default grid. Truncating the grid to 1 .
+ Fold1: mtry=2
- Fold1: mtry=2
+ Fold2: mtry=2
- Fold2: mtry=2
+ Fold3: mtry=2
- Fold3: mtry=2
+ Fold4: mtry=2
- Fold4: mtry=2
+ Fold5: mtry=2
- Fold5: mtry=2
Aggregating results
Fitting final model on full training set
[1] "maximum number of models that still need to be trained: 3"
[1] "model using cumulative_CI,CI_per_day will be trained now..."
note: only 1 unique complexity parameters in default grid. Truncating the grid to 1 .
+ Fold1: mtry=2
- Fold1: mtry=2
+ Fold2: mtry=2
- Fold2: mtry=2
+ Fold3: mtry=2
- Fold3: mtry=2
+ Fold4: mtry=2
- Fold4: mtry=2
+ Fold5: mtry=2
- Fold5: mtry=2
Aggregating results
Fitting final model on full training set
[1] "maximum number of models that still need to be trained: 2"
[1] "model using DOY,CI_per_day will be trained now..."
note: only 1 unique complexity parameters in default grid. Truncating the grid to 1 .
+ Fold1: mtry=2
- Fold1: mtry=2
+ Fold2: mtry=2
- Fold2: mtry=2
+ Fold3: mtry=2
- Fold3: mtry=2
+ Fold4: mtry=2
- Fold4: mtry=2
+ Fold5: mtry=2
- Fold5: mtry=2
Aggregating results
Fitting final model on full training set
[1] "maximum number of models that still need to be trained: 1"
[1] "vars selected: cumulative_CI,DOY with RMSE 24.808"
[1] "model using additional variable CI_per_day will be trained now..."
note: only 2 unique complexity parameters in default grid. Truncating the grid to 2 .
+ Fold1: mtry=2
- Fold1: mtry=2
+ Fold1: mtry=3
- Fold1: mtry=3
+ Fold2: mtry=2
- Fold2: mtry=2
+ Fold2: mtry=3
- Fold2: mtry=3
+ Fold3: mtry=2
- Fold3: mtry=2
+ Fold3: mtry=3
- Fold3: mtry=3
+ Fold4: mtry=2
- Fold4: mtry=2
+ Fold4: mtry=3
- Fold4: mtry=3
+ Fold5: mtry=2
- Fold5: mtry=2
+ Fold5: mtry=3
- Fold5: mtry=3
Aggregating results
Selecting tuning parameters
Fitting mtry = 3 on full training set
[1] "maximum number of models that still need to be trained: 0"
[1] "vars selected: cumulative_CI,DOY with RMSE 24.808"
field_groups count value
75% Top 25% 3 96.2
50% Average 7 93.0
25% Lowest 25% 2 84.0
Total area forecasted 12 219.0
=== KPI CALCULATION SUMMARY ===
Report Date: 2025-10-08
Current Week: 40
Previous Week: 39
Total Fields Analyzed: 12
Calculation Time: 2025-10-08 15:39:34.583434
Field Uniformity Summary:
uniformity_level count percent
1 Excellent 0 0
2 Good 0 0
3 Moderate 0 0
4 Poor 0 0
Area Change Summary:
change_type hectares percent
1 Improving areas 0 0
2 Stable areas 0 0
3 Declining areas 0 0
4 Total area 0 100
TCH Forecasted:
field_groups count value
75% Top 25% 3 96.2
50% Average 7 93.0
25% Lowest 25% 2 84.0
Total area forecasted 12 219.0
Growth Decline Index:
risk_level count percent
1 High 0 0
2 Low 0 0
3 Moderate 0 0
4 Very-high 0 0
Weed Presence Score:
weed_risk_level field_count percent
1 Canopy closed - Low weed risk 4 33.3
2 High 0 0.0
3 Low 0 0.0
4 Moderate 0 0.0
Gap Filling Score:
# A tibble: 1 × 3
gap_level field_count percent
<chr> <int> <dbl>
1 <NA> 12 100
=== KPI CALCULATION COMPLETED ===
There were 50 or more warnings (use warnings() to see the first 50)
>
> proc.time()
user system elapsed
11.93 0.93 13.45

Binary file not shown.

View file

@ -1,447 +0,0 @@
# ============================================================================
# OPERATIONAL HARVEST PREDICTION
# Analyze current season growth curves to predict harvest timing
# ============================================================================
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(terra)
library(sf)
library(here)
library(ggplot2)
})
# Set project directory
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
source(here("r_app", "parameters_project.R"))
# ============================================================================
# STEP 1: LOAD DATA
# ============================================================================
cat("=== LOADING DATA ===\n\n")
# Load CI time series
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
time_series_daily <- ci_data_raw %>%
mutate(
date = as.Date(Date),
week = isoweek(date),
year = isoyear(date)
) %>%
select(
field_id = field,
date,
week,
year,
mean_ci = FitData
) %>%
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
arrange(field_id, date)
# Load harvest data
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(!is.na(season_end))
fields_with_ci <- unique(time_series_daily$field_id)
harvest_data_filtered <- harvest_data %>%
filter(field %in% fields_with_ci) %>%
arrange(field, season_end)
cat("Loaded CI data for", length(fields_with_ci), "fields\n")
cat("Loaded harvest data for", length(unique(harvest_data_filtered$field)), "fields\n\n")
# ============================================================================
# STEP 2: SEGMENT TIME SERIES BY SEASON
# ============================================================================
cat("=== SEGMENTING TIME SERIES INTO INDIVIDUAL SEASONS ===\n\n")
# For each field, create seasons based on harvest dates
# Season starts day after previous harvest, ends at next harvest
create_seasons <- function(field_name, ci_ts, harvest_df) {
# Get CI data for this field
field_ci <- ci_ts %>%
filter(field_id == field_name) %>%
arrange(date)
# Get harvest dates for this field
field_harvests <- harvest_df %>%
filter(field == field_name) %>%
arrange(season_end) %>%
mutate(season_id = row_number())
if (nrow(field_harvests) == 0) {
return(NULL)
}
# Create season segments
seasons_list <- list()
for (i in 1:nrow(field_harvests)) {
# Season start: day after previous harvest (or start of data if first season)
if (i == 1) {
season_start <- min(field_ci$date)
} else {
season_start <- field_harvests$season_end[i-1] + 1
}
# Season end: current harvest date
season_end <- field_harvests$season_end[i]
# Extract CI data for this season
season_ci <- field_ci %>%
filter(date >= season_start, date <= season_end)
if (nrow(season_ci) > 0) {
season_ci$season_id <- i
season_ci$season_start_date <- season_start
season_ci$season_end_date <- season_end
season_ci$days_in_season <- as.numeric(season_end - season_start)
season_ci$days_since_start <- as.numeric(season_ci$date - season_start)
season_ci$days_until_harvest <- as.numeric(season_end - season_ci$date)
seasons_list[[i]] <- season_ci
}
}
# Add current ongoing season (after last harvest)
if (nrow(field_harvests) > 0) {
last_harvest <- field_harvests$season_end[nrow(field_harvests)]
current_season_start <- last_harvest + 1
current_season_ci <- field_ci %>%
filter(date >= current_season_start)
if (nrow(current_season_ci) > 0) {
current_season_ci$season_id <- nrow(field_harvests) + 1
current_season_ci$season_start_date <- current_season_start
current_season_ci$season_end_date <- NA # Unknown - this is what we're predicting
current_season_ci$days_in_season <- NA
current_season_ci$days_since_start <- as.numeric(current_season_ci$date - current_season_start)
current_season_ci$days_until_harvest <- NA
seasons_list[[length(seasons_list) + 1]] <- current_season_ci
}
}
if (length(seasons_list) > 0) {
return(bind_rows(seasons_list))
} else {
return(NULL)
}
}
# Create segmented data for all fields
all_seasons <- lapply(fields_with_ci, function(field_name) {
seasons <- create_seasons(field_name, time_series_daily, harvest_data_filtered)
if (!is.null(seasons)) {
seasons$field_id <- field_name
}
return(seasons)
}) %>%
bind_rows()
cat("Created", nrow(all_seasons), "season-segmented observations\n")
cat("Total seasons:", length(unique(paste(all_seasons$field_id, all_seasons$season_id))), "\n\n")
# Summary by season
season_summary <- all_seasons %>%
group_by(field_id, season_id) %>%
summarise(
season_start = min(season_start_date),
season_end = max(season_end_date),
n_observations = n(),
days_duration = max(days_in_season, na.rm = TRUE),
max_ci = max(mean_ci, na.rm = TRUE),
is_current = all(is.na(season_end_date)),
.groups = "drop"
)
cat("Season summary:\n")
print(head(season_summary, 20))
# ============================================================================
# STEP 3: GROWTH CURVE ANALYSIS PER SEASON
# ============================================================================
cat("\n\n=== ANALYZING GROWTH CURVES PER SEASON ===\n\n")
# Smoothing function (Savitzky-Golay style moving average)
smooth_ci <- function(ci_values, window = 15) {
n <- length(ci_values)
if (n < window) window <- max(3, n)
smoothed <- rep(NA, n)
half_window <- floor(window / 2)
for (i in 1:n) {
start_idx <- max(1, i - half_window)
end_idx <- min(n, i + half_window)
smoothed[i] <- mean(ci_values[start_idx:end_idx], na.rm = TRUE)
}
return(smoothed)
}
# Detect peak and senescence
analyze_season_curve <- function(season_df) {
if (nrow(season_df) < 20) {
return(list(
peak_date = NA,
peak_ci = NA,
peak_days_since_start = NA,
senescence_start_date = NA,
senescence_rate = NA,
current_phase = "insufficient_data"
))
}
# Smooth the curve
season_df$ci_smooth <- smooth_ci(season_df$mean_ci)
# Find peak
peak_idx <- which.max(season_df$ci_smooth)
peak_date <- season_df$date[peak_idx]
peak_ci <- season_df$ci_smooth[peak_idx]
peak_days <- season_df$days_since_start[peak_idx]
# Check if we're past the peak
last_date <- max(season_df$date)
is_post_peak <- last_date > peak_date
# Calculate senescence rate (slope after peak)
if (is_post_peak && peak_idx < nrow(season_df) - 5) {
post_peak_data <- season_df[(peak_idx):nrow(season_df), ]
# Fit linear model to post-peak data
lm_post <- lm(ci_smooth ~ days_since_start, data = post_peak_data)
senescence_rate <- coef(lm_post)[2] # Slope
senescence_start <- peak_date
} else {
senescence_rate <- NA
senescence_start <- NA
}
# Determine current phase
current_ci <- tail(season_df$ci_smooth, 1)
if (is.na(current_ci)) {
current_phase <- "unknown"
} else if (!is_post_peak) {
current_phase <- "growing"
} else if (current_ci > 2.5) {
current_phase <- "post_peak_maturing"
} else {
current_phase <- "declining_harvest_approaching"
}
return(list(
peak_date = peak_date,
peak_ci = peak_ci,
peak_days_since_start = peak_days,
senescence_start_date = senescence_start,
senescence_rate = senescence_rate,
current_phase = current_phase,
current_ci = current_ci,
last_obs_date = last_date
))
}
# Analyze each season
season_analysis <- all_seasons %>%
group_by(field_id, season_id) %>%
group_modify(~ {
analysis <- analyze_season_curve(.x)
as.data.frame(analysis)
}) %>%
ungroup()
# Merge with season summary
season_results <- season_summary %>%
left_join(season_analysis, by = c("field_id", "season_id"))
cat("Analyzed", nrow(season_results), "seasons\n\n")
# ============================================================================
# STEP 4: HARVEST TIMING PATTERNS (Historical Analysis)
# ============================================================================
cat("=== ANALYZING HISTORICAL HARVEST TIMING PATTERNS ===\n\n")
# Look at completed seasons only
historical_seasons <- season_results %>%
filter(!is_current) %>%
mutate(
days_peak_to_harvest = as.numeric(season_end - peak_date)
)
cat("Historical season statistics (completed harvests):\n\n")
cat("Average days from peak to harvest:\n")
peak_to_harvest_stats <- historical_seasons %>%
filter(!is.na(days_peak_to_harvest)) %>%
summarise(
mean_days = mean(days_peak_to_harvest, na.rm = TRUE),
median_days = median(days_peak_to_harvest, na.rm = TRUE),
sd_days = sd(days_peak_to_harvest, na.rm = TRUE),
min_days = min(days_peak_to_harvest, na.rm = TRUE),
max_days = max(days_peak_to_harvest, na.rm = TRUE)
)
print(peak_to_harvest_stats)
cat("\n\nPeak CI at harvest time:\n")
peak_ci_stats <- historical_seasons %>%
filter(!is.na(peak_ci)) %>%
summarise(
mean_peak_ci = mean(peak_ci, na.rm = TRUE),
median_peak_ci = median(peak_ci, na.rm = TRUE),
sd_peak_ci = sd(peak_ci, na.rm = TRUE)
)
print(peak_ci_stats)
cat("\n\nSenescence rate (CI decline per day after peak):\n")
senescence_stats <- historical_seasons %>%
filter(!is.na(senescence_rate), senescence_rate < 0) %>%
summarise(
mean_rate = mean(senescence_rate, na.rm = TRUE),
median_rate = median(senescence_rate, na.rm = TRUE),
sd_rate = sd(senescence_rate, na.rm = TRUE)
)
print(senescence_stats)
# ============================================================================
# STEP 5: CURRENT SEASON PREDICTIONS
# ============================================================================
cat("\n\n=== PREDICTING HARVEST FOR CURRENT ONGOING SEASONS ===\n\n")
# Get current seasons
current_seasons <- season_results %>%
filter(is_current) %>%
mutate(
# Use historical average to predict harvest
predicted_harvest_date = peak_date + peak_to_harvest_stats$mean_days,
days_until_predicted_harvest = as.numeric(predicted_harvest_date - last_obs_date),
weeks_until_predicted_harvest = days_until_predicted_harvest / 7
)
cat("Current ongoing seasons (ready for harvest prediction):\n\n")
current_predictions <- current_seasons %>%
mutate(
days_since_peak = as.numeric(last_obs_date - peak_date)
) %>%
select(
field_id,
season_id,
last_harvest = season_start,
last_observation = last_obs_date,
current_ci,
current_phase,
peak_date,
peak_ci,
days_since_peak,
predicted_harvest = predicted_harvest_date,
weeks_until_harvest = weeks_until_predicted_harvest
) %>%
arrange(weeks_until_harvest)
print(current_predictions)
cat("\n\nHarvest readiness assessment:\n\n")
harvest_alerts <- current_predictions %>%
mutate(
alert = case_when(
current_ci < 2.5 & current_phase == "declining_harvest_approaching" ~ "🚨 HARVEST IMMINENT (CI < 2.5)",
current_ci < 3.0 & weeks_until_harvest < 2 ~ "⚠️ HARVEST WITHIN 2 WEEKS",
weeks_until_harvest < 4 ~ "💡 HARVEST WITHIN 1 MONTH",
current_phase == "growing" ~ "✅ STILL GROWING",
TRUE ~ "📊 MONITORING"
)
) %>%
select(field_id, current_ci, current_phase, predicted_harvest, alert)
print(harvest_alerts)
# ============================================================================
# STEP 6: VALIDATION OF PREDICTION METHOD
# ============================================================================
cat("\n\n=== VALIDATING PREDICTION METHOD ON HISTORICAL DATA ===\n\n")
# For each historical season, predict when harvest would occur using only data up to peak
validation_results <- historical_seasons %>%
filter(!is.na(peak_date), !is.na(season_end)) %>%
mutate(
predicted_harvest = peak_date + peak_to_harvest_stats$mean_days,
actual_harvest = season_end,
prediction_error_days = as.numeric(predicted_harvest - actual_harvest),
prediction_error_weeks = prediction_error_days / 7
)
cat("Prediction accuracy metrics:\n\n")
accuracy_metrics <- validation_results %>%
summarise(
n_predictions = n(),
mean_error_days = mean(abs(prediction_error_days), na.rm = TRUE),
median_error_days = median(abs(prediction_error_days), na.rm = TRUE),
rmse_days = sqrt(mean(prediction_error_days^2, na.rm = TRUE)),
within_2_weeks = sum(abs(prediction_error_weeks) <= 2, na.rm = TRUE),
pct_within_2_weeks = 100 * sum(abs(prediction_error_weeks) <= 2, na.rm = TRUE) / n()
)
print(accuracy_metrics)
cat("\n\nSample predictions vs actual:\n")
print(validation_results %>%
select(field_id, season_id, peak_date, predicted_harvest, actual_harvest,
prediction_error_weeks) %>%
head(15))
# ============================================================================
# SUMMARY
# ============================================================================
cat("\n\n=== OPERATIONAL HARVEST PREDICTION SUMMARY ===\n\n")
cat("METHODOLOGY:\n")
cat("1. Segment CI time series by harvest dates (each season = planting to harvest)\n")
cat("2. Smooth CI data to identify peak (maturity point)\n")
cat("3. Historical pattern: Average", round(peak_to_harvest_stats$mean_days), "days from peak to harvest\n")
cat("4. Current season prediction: Peak date +", round(peak_to_harvest_stats$mean_days), "days\n\n")
cat("PREDICTION ACCURACY (Historical Validation):\n")
cat(" - Mean absolute error:", round(accuracy_metrics$mean_error_days), "days\n")
cat(" - RMSE:", round(accuracy_metrics$rmse_days), "days\n")
cat(" - Accuracy within 2 weeks:", round(accuracy_metrics$pct_within_2_weeks), "%\n\n")
cat("HARVEST TRIGGER (Operational Rule):\n")
cat(" - Primary: CI drops below 2.5 while in declining phase\n")
cat(" - Secondary: Predicted harvest date approaches (±2 weeks)\n")
cat(" - Confirmation: Visual inspection when both conditions met\n\n")
cat("FIELDS READY FOR HARVEST NOW:\n")
ready_now <- harvest_alerts %>%
filter(grepl("IMMINENT|WITHIN 2 WEEKS", alert))
if (nrow(ready_now) > 0) {
print(ready_now)
} else {
cat(" No fields at immediate harvest stage\n")
}
cat("\n=== ANALYSIS COMPLETE ===\n")

View file

@ -1,72 +0,0 @@
# SmartCane - Git Push to Bitbucket
# Run this script to commit and push all changes
# Step 1: Check current status
Write-Host "=== Current Git Status ===" -ForegroundColor Cyan
git status
# Step 2: Add all new and modified files
Write-Host "`n=== Adding Files ===" -ForegroundColor Cyan
git add -A
# Step 3: Show what will be committed
Write-Host "`n=== Files to be committed ===" -ForegroundColor Cyan
git status
# Step 4: Commit with descriptive message
Write-Host "`n=== Committing Changes ===" -ForegroundColor Cyan
$commitMessage = @"
Add KPI reporting system and deployment documentation
Major Changes:
- NEW: Scripts 09 & 10 for KPI calculation and enhanced reporting
- NEW: Shell script wrappers (01-10) for easier execution
- NEW: R packages flextable and officer for enhanced Word reports
- NEW: DEPLOYMENT_README.md with complete deployment guide
- RENAMED: Numbered R scripts (02, 03, 04) for clarity
- REMOVED: Old package management scripts (using renv only)
- UPDATED: Workflow now uses scripts 09->10 instead of 05
Files Changed: 90+ files
New Packages: flextable, officer
New Scripts: 09_run_calculate_kpis.sh, 10_run_kpi_report.sh
Documentation: DEPLOYMENT_README.md, EMAIL_TO_ADMIN.txt
See DEPLOYMENT_README.md for full deployment instructions.
"@
git commit -m $commitMessage
# Step 5: Push to Bitbucket
Write-Host "`n=== Ready to Push ===" -ForegroundColor Yellow
Write-Host "Current branch: " -NoNewline
git branch --show-current
Write-Host "`nDo you want to push to Bitbucket? (Y/N): " -ForegroundColor Yellow -NoNewline
$confirmation = Read-Host
if ($confirmation -eq 'Y' -or $confirmation -eq 'y') {
Write-Host "`n=== Pushing to Bitbucket ===" -ForegroundColor Green
# Get current branch name
$branch = git branch --show-current
# Push to origin
git push origin $branch
Write-Host "`n[SUCCESS] Pushed to Bitbucket!" -ForegroundColor Green
Write-Host "`nNext steps:" -ForegroundColor Cyan
Write-Host "1. Send EMAIL_TO_ADMIN.txt to your administrator"
Write-Host "2. Ensure they have access to the Bitbucket repository"
Write-Host "3. Monitor deployment and test on Linux server"
Write-Host "4. Update Laravel UI with Script 10 parameters"
} else {
Write-Host "`n[CANCELLED] Push cancelled. Run 'git push origin $(git branch --show-current)' when ready." -ForegroundColor Yellow
}
Write-Host "`n=== Summary ===" -ForegroundColor Cyan
Write-Host "Deployment guide: DEPLOYMENT_README.md"
Write-Host "Admin email: EMAIL_TO_ADMIN.txt"
Write-Host "New scripts: 09_run_calculate_kpis.sh, 10_run_kpi_report.sh"
Write-Host "New packages: flextable, officer"

View file

@ -51,6 +51,7 @@ from harvest_date_pred_utils import (
def main(): def main():
# Get project name from command line or use default # Get project name from command line or use default
project_name = sys.argv[1] if len(sys.argv) > 1 else "angata" project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
field_filter = sys.argv[2] if len(sys.argv) > 2 else None # Optional: test single field
# Construct paths # Construct paths
base_storage = Path("../laravel_app/storage/app") / project_name / "Data" base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
@ -71,6 +72,8 @@ def main():
print("="*80) print("="*80)
print(f"HARVEST DATE PREDICTION - LSTM MODEL 307 ({project_name})") print(f"HARVEST DATE PREDICTION - LSTM MODEL 307 ({project_name})")
if field_filter:
print(f"TEST MODE: Single field ({field_filter})")
print("="*80) print("="*80)
# [1/4] Load model # [1/4] Load model
@ -82,14 +85,31 @@ def main():
# [2/4] Load and prepare CI data # [2/4] Load and prepare CI data
print("\n[2/4] Loading CI data...") print("\n[2/4] Loading CI data...")
print(f" From: {CI_DATA_FILE}") print(f" From: {CI_DATA_FILE}")
ci_data = pd.read_csv(CI_DATA_FILE) ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str}) # Force field as string
ci_data['Date'] = pd.to_datetime(ci_data['Date']) ci_data['Date'] = pd.to_datetime(ci_data['Date'])
print(f" Loaded {len(ci_data)} daily rows across {ci_data['field'].nunique()} fields") print(f" Loaded {len(ci_data)} daily rows across {ci_data['field'].nunique()} fields")
print(f" Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}") print(f" Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}")
# Optional: Filter to single field for testing
if field_filter:
field_filter = str(field_filter) # Ensure field_filter is string
ci_data_filtered = ci_data[ci_data['field'] == field_filter]
if len(ci_data_filtered) == 0:
print(f"\n✗ ERROR: No data found for field '{field_filter}'")
available_fields = sorted(ci_data['field'].unique())
print(f" Available fields ({len(available_fields)}): {', '.join(available_fields[:10])}")
if len(available_fields) > 10:
print(f" ... and {len(available_fields) - 10} more")
return
ci_data = ci_data_filtered
print(f" ✓ Filtered to single field: {field_filter}")
print(f" Data points: {len(ci_data)} days")
# [3/4] Run model predictions with two-step detection # [3/4] Run model predictions with two-step detection
print("\n[3/4] Running two-step harvest detection...") print("\n[3/4] Running two-step harvest detection...")
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device) print(" (Using threshold=0.45, consecutive_days=2 - tuned for Model 307 output)")
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
phase1_threshold=0.45, phase1_consecutive=2)
# Build and export # Build and export
print("\nBuilding production harvest table...") print("\nBuilding production harvest table...")
@ -102,10 +122,11 @@ def main():
print(f" Input: laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/") print(f" Input: laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
print(f" Output: laravel_app/storage/app/{project_name}/Data/HarvestData/") print(f" Output: laravel_app/storage/app/{project_name}/Data/HarvestData/")
print(f"\nColumn structure:") print(f"\nColumn structure:")
print(f" field, sub_field, season, year, season_start_date, season_end_date, phase1_harvest_date") print(f" field, sub_field, season, season_start_date, season_end_date, phase2_harvest_date")
print(f"\nNext steps:") print(f"\nNext steps:")
print(f" 1. Review baseline predictions in harvest_production_export.xlsx") print(f" 1. Review predictions in harvest_production_export.xlsx")
print(f" 2. Run weekly monitoring: python 02_harvest_imminent_weekly.py {project_name}") print(f" 2. Run weekly monitoring: python 31_harvest_imminent_weekly.py {project_name}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -271,11 +271,17 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
return df return df
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device): def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
threshold=0.45, consecutive_days=2):
""" """
Phase 1: Growing window detection with threshold crossing. Phase 1: Growing window detection with threshold crossing.
Expand window day-by-day, check last timestep's detected_prob. Expand window day-by-day, check last timestep's detected_prob.
When 3 consecutive days have prob > 0.5, harvest detected. When N consecutive days have prob > threshold, harvest detected.
Args:
threshold (float): Probability threshold (default 0.45, tuned for Model 307)
consecutive_days (int): Required consecutive days above threshold (default 2, reduced from 3 for robustness)
Returns list of (harvest_date, harvest_idx) tuples. Returns list of (harvest_date, harvest_idx) tuples.
""" """
harvest_dates = [] harvest_dates = []
@ -306,18 +312,18 @@ def run_phase1_growing_window(field_data, model, config, scalers, ci_column, dev
# Check LAST timestep # Check LAST timestep
last_prob = detected_probs[-1] last_prob = detected_probs[-1]
if last_prob > 0.5: if last_prob > threshold:
consecutive_above_threshold += 1 consecutive_above_threshold += 1
else: else:
consecutive_above_threshold = 0 consecutive_above_threshold = 0
# Harvest detected: 3 consecutive days above threshold # Harvest detected: N consecutive days above threshold
if consecutive_above_threshold >= 3: if consecutive_above_threshold >= consecutive_days:
harvest_date = field_data.iloc[current_pos + window_end - 3]['Date'] harvest_date = field_data.iloc[current_pos + window_end - consecutive_days]['Date']
harvest_dates.append((harvest_date, current_pos + window_end - 3)) harvest_dates.append((harvest_date, current_pos + window_end - consecutive_days))
# Reset to next day after harvest # Reset to next day after harvest
current_pos = current_pos + window_end - 2 current_pos = current_pos + window_end - consecutive_days + 1
break break
except Exception: except Exception:
@ -391,12 +397,17 @@ def run_phase2_refinement(field_data, phase1_harvests, model, config, scalers, c
return refined_harvests return refined_harvests
def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=None): def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=None,
phase1_threshold=0.45, phase1_consecutive=2):
""" """
Two-step harvest detection for each field: Two-step harvest detection for each field:
1. Phase 1: Growing window with 3-day threshold confirmation 1. Phase 1: Growing window with threshold confirmation
2. Phase 2: ±40 day refinement with argmax 2. Phase 2: ±40 day refinement with argmax
Args:
phase1_threshold (float): Probability threshold for Phase 1 (default 0.45, tuned for Model 307)
phase1_consecutive (int): Consecutive days required (default 2, reduced from 3 for robustness)
Returns list of dicts with field, season_start_date, season_end_date, etc. Returns list of dicts with field, season_start_date, season_end_date, etc.
""" """
if device is None: if device is None:
@ -411,6 +422,7 @@ def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=Non
harvests_found = 0 harvests_found = 0
print(f" Processing {total_fields} fields...") print(f" Processing {total_fields} fields...")
print(f" Phase 1 parameters: threshold={phase1_threshold}, consecutive_days={phase1_consecutive}")
for idx, (field, field_data) in enumerate(field_groups, 1): for idx, (field, field_data) in enumerate(field_groups, 1):
# Simple progress indicator # Simple progress indicator
@ -423,7 +435,8 @@ def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=Non
field_data = field_data.sort_values('Date').reset_index(drop=True) field_data = field_data.sort_values('Date').reset_index(drop=True)
# Phase 1: Growing window detection # Phase 1: Growing window detection
phase1_harvests = run_phase1_growing_window(field_data, model, config, scalers, ci_column, device) phase1_harvests = run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
threshold=phase1_threshold, consecutive_days=phase1_consecutive)
if not phase1_harvests: if not phase1_harvests:
continue continue
@ -475,7 +488,7 @@ def build_production_harvest_table(refined_results: List[Dict]) -> pd.DataFrame:
# Ensure date columns are datetime # Ensure date columns are datetime
df['season_start_date'] = pd.to_datetime(df['season_start_date']).dt.strftime('%Y-%m-%d') df['season_start_date'] = pd.to_datetime(df['season_start_date']).dt.strftime('%Y-%m-%d')
df['season_end_date'] = pd.to_datetime(df['season_end_date']).dt.strftime('%Y-%m-%d') df['season_end_date'] = pd.to_datetime(df['season_end_date']).dt.strftime('%Y-%m-%d')
df['phase1_harvest_date'] = pd.to_datetime(df['phase1_harvest_date']).dt.strftime('%Y-%m-%d') df['phase2_harvest_date'] = pd.to_datetime(df['phase2_harvest_date']).dt.strftime('%Y-%m-%d')
print(f"Built production table with {len(df)} field/season combinations") print(f"Built production table with {len(df)} field/season combinations")

View file

@ -402,4 +402,32 @@ cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n") cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n") cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
# ============================================================================
# WRITE TILING CONFIGURATION METADATA
# ============================================================================
# This metadata file is read by parameters_project.R to determine mosaic mode
# It allows script 40 to know what script 10 decided without re-computing
cat("\n[10] Writing tiling configuration metadata...\n")
config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json")
config_json <- paste0(
'{\n',
' "project": "', PROJECT, '",\n',
' "has_tiles": ', tolower(N_TILES > 1), ',\n',
' "grid_size": "', GRID_SIZE_LABEL, '",\n',
' "grid_rows": ', GRID_NROWS, ',\n',
' "grid_cols": ', GRID_NCOLS, ',\n',
' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n',
' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n',
' "created_date": "', Sys.Date(), '",\n',
' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n',
'}\n'
)
writeLines(config_json, config_file)
cat(" ✓ Metadata saved to: tiling_config.json\n")
cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "")
cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "")
cat("\n✓ Script complete!\n") cat("\n✓ Script complete!\n")

View file

@ -884,15 +884,10 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin
names(blue_band) <- "Blue" names(blue_band) <- "Blue"
names(nir_band) <- "NIR" names(nir_band) <- "NIR"
# Create CI band # Create CI band inline: NIR/Green - 1
if (raster_info$type == "4b") { ci_band <- nir_band / green_band - 1
ci_band <- (nir_band - red_band) / (nir_band + red_band)
} else if (raster_info$type == "8b") {
red_edge <- tile_rast[[raster_info$red_idx]]
ci_band <- (nir_band - red_edge) / (nir_band + red_edge)
}
names(ci_band) <- "CI" names(ci_band) <- "CI"
# Create output raster with Red, Green, Blue, NIR, CI # Create output raster with Red, Green, Blue, NIR, CI
output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band) output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band)
names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI") names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI")
@ -1018,33 +1013,3 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C
return(aggregated) return(aggregated)
} }
#' Create CI band from available bands (if not pre-computed)
#'
#' @param raster Loaded raster object
#' @param raster_info Output from detect_raster_structure()
#' @return Single-layer raster with CI band
#'
create_ci_band <- function(raster, raster_info) {
if (raster_info$type == "4b") {
# Calculate NDVI for 4-band data: (NIR - Red) / (NIR + Red)
red <- raster[[raster_info$red_idx]]
nir <- raster[[raster_info$nir_idx]]
ci <- (nir - red) / (nir + red)
} else if (raster_info$type == "8b") {
# Use RedEdge for 8-band data: (NIR - RedEdge) / (NIR + RedEdge)
red_edge <- raster[[raster_info$red_idx]]
nir <- raster[[raster_info$nir_idx]]
ci <- (nir - red_edge) / (nir + red_edge)
} else {
stop("Unsupported raster type")
}
# Apply cloud mask if available (UDM band)
if (!is.na(raster_info$udm_idx)) {
udm <- raster[[raster_info$udm_idx]]
ci <- terra::mask(ci, udm, maskvalues = 0)
}
return(ci)
}

View file

@ -127,29 +127,68 @@ main <- function() {
safe_log(paste("Output will be saved as:", file_name_tif)) safe_log(paste("Output will be saved as:", file_name_tif))
# 5. Create weekly per-tile MAX mosaics # 5. Create weekly mosaics - route based on project tile detection
# ---------------------------------- # ---------------------------------------------------------------
# The use_tile_mosaic flag is auto-detected by parameters_project.R
# based on whether tiles exist in merged_final_tif/
tryCatch({ if (!exists("use_tile_mosaic")) {
safe_log("Starting per-tile mosaic creation...") # Fallback detection if flag not set (shouldn't happen)
merged_final_dir <- file.path(laravel_storage, "merged_final_tif")
tile_detection <- detect_mosaic_mode(merged_final_dir)
use_tile_mosaic <- tile_detection$has_tiles
}
if (use_tile_mosaic) {
# TILE-BASED APPROACH: Create per-tile weekly MAX mosaics
# This is used for projects like Angata with large ROIs requiring spatial partitioning
tryCatch({
safe_log("Starting per-tile mosaic creation (tile-based approach)...")
# Set output directory for per-tile mosaics
tile_output_base <- file.path(laravel_storage, "weekly_tile_max")
created_tile_files <- create_weekly_mosaic_from_tiles(
dates = dates,
merged_final_dir = merged_final,
tile_output_dir = tile_output_base,
field_boundaries = field_boundaries
)
safe_log(paste("✓ Per-tile mosaic creation completed - created",
length(created_tile_files), "tile files"))
}, error = function(e) {
safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR")
traceback()
stop("Mosaic creation failed")
})
# Set output directory for per-tile mosaics } else {
tile_output_base <- file.path(laravel_storage, "weekly_tile_max") # SINGLE-FILE APPROACH: Create single weekly mosaic file
# This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output
created_tile_files <- create_weekly_mosaic_from_tiles( tryCatch({
dates = dates, safe_log("Starting single-file mosaic creation (backward-compatible approach)...")
merged_final_dir = merged_final,
tile_output_dir = tile_output_base, # Set output directory for single-file mosaics
field_boundaries = field_boundaries single_file_output_dir <- file.path(laravel_storage, "weekly_mosaic")
)
created_file <- create_weekly_mosaic(
safe_log(paste("✓ Per-tile mosaic creation completed - created", dates = dates,
length(created_tile_files), "tile files")) field_boundaries = field_boundaries,
}, error = function(e) { daily_vrt_dir = daily_vrt,
safe_log(paste("ERROR in mosaic creation:", e$message), "WARNING") merged_final_dir = merged_final,
traceback() output_dir = single_file_output_dir,
stop("Mosaic creation failed") file_name_tif = file_name_tif,
}) create_plots = FALSE
)
safe_log(paste("✓ Single-file mosaic creation completed:", created_file))
}, error = function(e) {
safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR")
traceback()
stop("Mosaic creation failed")
})
}
} }
if (sys.nframe() == 0) { if (sys.nframe() == 0) {

View file

@ -3,6 +3,38 @@
# Utility functions for creating weekly mosaics from daily satellite imagery. # Utility functions for creating weekly mosaics from daily satellite imagery.
# These functions support cloud cover assessment, date handling, and mosaic creation. # These functions support cloud cover assessment, date handling, and mosaic creation.
#' Detect whether a project uses tile-based or single-file mosaic approach
#'
#' @param merged_final_tif_dir Directory containing merged_final_tif files
#' @return List with has_tiles (logical), detected_tiles (vector), total_files (count)
#'
detect_mosaic_mode <- function(merged_final_tif_dir) {
# Check if directory exists
if (!dir.exists(merged_final_tif_dir)) {
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
}
# List all .tif files in merged_final_tif
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
if (length(tif_files) == 0) {
return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0))
}
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
tile_pattern <- "_(\\d{2})\\.tif$"
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
has_tiles <- length(tile_files) > 0
return(list(
has_tiles = has_tiles,
detected_tiles = tile_files,
total_files = length(tif_files)
))
}
#' Safe logging function #' Safe logging function
#' @param message The message to log #' @param message The message to log
#' @param level The log level (default: "INFO") #' @param level The log level (default: "INFO")

View file

@ -13,8 +13,81 @@ suppressPackageStartupMessages({
library(sf) library(sf)
library(dplyr) library(dplyr)
library(tidyr) library(tidyr)
library(jsonlite) # For reading tiling_config.json
}) })
# 2. Smart detection for tile-based vs single-file mosaic approach
# ----------------------------------------------------------------
detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
# PRIORITY 1: Check for tiling_config.json metadata file from script 10
# This is the most reliable source since script 10 explicitly records its decision
if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
# Try to find tiling_config.json in any grid-size subfolder
config_files <- list.files(daily_tiles_split_dir,
pattern = "tiling_config\\.json$",
recursive = TRUE,
full.names = TRUE)
if (length(config_files) > 0) {
# Found a config file - use the most recent one
config_file <- config_files[which.max(file.info(config_files)$mtime)]
tryCatch({
config_json <- jsonlite::read_json(config_file)
return(list(
has_tiles = config_json$has_tiles %||% TRUE,
detected_tiles = character(),
total_files = 0,
source = "tiling_config.json",
grid_size = config_json$grid_size %||% "unknown"
))
}, error = function(e) {
warning("Error reading tiling_config.json: ", e$message)
# Fall through to file-based detection
})
}
}
# PRIORITY 2: File-based detection (fallback if metadata not found)
# Check if merged_final_tif/ contains tile-named files
if (!dir.exists(merged_final_tif_dir)) {
return(list(
has_tiles = FALSE,
detected_tiles = character(),
total_files = 0,
source = "directory_not_found"
))
}
# List all .tif files in merged_final_tif
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
if (length(tif_files) == 0) {
return(list(
has_tiles = FALSE,
detected_tiles = character(),
total_files = 0,
source = "no_files_found"
))
}
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
tile_pattern <- "_(\\d{2})\\.tif$"
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
has_tiles <- length(tile_files) > 0
return(list(
has_tiles = has_tiles,
detected_tiles = tile_files,
total_files = length(tif_files),
source = "file_pattern_detection"
))
}
# 2. Define project directory structure # 2. Define project directory structure
# ----------------------------------- # -----------------------------------
setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") { setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") {
@ -26,6 +99,16 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
# Alternative: merged_tif for 4-band legacy data # Alternative: merged_tif for 4-band legacy data
merged_tif_folder <- here(laravel_storage_dir, data_source) merged_tif_folder <- here(laravel_storage_dir, data_source)
# Detect tile mode based on metadata from script 10 or file patterns
merged_final_dir <- here(laravel_storage_dir, "merged_final_tif")
daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")
tile_detection <- detect_mosaic_mode(
merged_final_tif_dir = merged_final_dir,
daily_tiles_split_dir = daily_tiles_split_dir
)
use_tile_mosaic <- tile_detection$has_tiles
# Main subdirectories # Main subdirectories
dirs <- list( dirs <- list(
reports = here(laravel_storage_dir, "reports"), reports = here(laravel_storage_dir, "reports"),
@ -33,7 +116,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
data = here(laravel_storage_dir, "Data"), data = here(laravel_storage_dir, "Data"),
tif = list( tif = list(
merged = merged_tif_folder, # Use data_source parameter to select folder merged = merged_tif_folder, # Use data_source parameter to select folder
final = here(laravel_storage_dir, "merged_final_tif") final = merged_final_dir
), ),
weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"), weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"),
weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"), weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"),
@ -61,9 +144,17 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b"
merged_final = dirs$tif$final, merged_final = dirs$tif$final,
daily_CI_vals_dir = dirs$extracted_ci$daily, daily_CI_vals_dir = dirs$extracted_ci$daily,
cumulative_CI_vals_dir = dirs$extracted_ci$cumulative, cumulative_CI_vals_dir = dirs$extracted_ci$cumulative,
weekly_CI_mosaic = dirs$weekly_mosaic, weekly_CI_mosaic = if (use_tile_mosaic) dirs$weekly_tile_max else dirs$weekly_mosaic, # SMART: Route based on tile detection
daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction
weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output) weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output)
use_tile_mosaic = use_tile_mosaic, # Flag indicating if tiles are used for this project
tile_detection_info = list(
has_tiles = tile_detection$has_tiles,
detected_source = tile_detection$source,
detected_count = tile_detection$total_files,
grid_size = tile_detection$grid_size %||% "unknown",
sample_tiles = head(tile_detection$detected_tiles, 3)
),
harvest_dir = dirs$harvest, harvest_dir = dirs$harvest,
extracted_CI_dir = dirs$extracted_ci$base extracted_CI_dir = dirs$extracted_ci$base
)) ))
@ -329,8 +420,20 @@ if (exists("project_dir")) {
# Expose all variables to the global environment # Expose all variables to the global environment
list2env(project_config, envir = .GlobalEnv) list2env(project_config, envir = .GlobalEnv)
# Log project initialization completion # Log project initialization completion with tile mode info
log_message(paste("Project initialized with directory:", project_dir)) log_message(paste("Project initialized with directory:", project_dir))
if (exists("use_tile_mosaic")) {
mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE"
log_message(paste("Mosaic mode detected:", mosaic_mode))
if (exists("tile_detection_info") && !is.null(tile_detection_info)) {
log_message(paste(" - Detection source:", tile_detection_info$detected_source))
log_message(paste(" - Grid size:", tile_detection_info$grid_size))
log_message(paste(" - Detected files in storage:", tile_detection_info$detected_count))
if (length(tile_detection_info$sample_tiles) > 0) {
log_message(paste(" - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", ")))
}
}
}
} else { } else {
warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R") warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
} }

View file

@ -703,9 +703,30 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed
target_year <- lubridate::isoyear(target_date) target_year <- lubridate::isoyear(target_date)
# Generate the file path for the target week # Primary approach: Try single-file mosaic path first
path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif")) path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif"))
# Smart fallback: If single-file doesn't exist AND path contains "weekly_mosaic", check for tiles
if (!file.exists(path_to_week) && grepl("weekly_mosaic", mosaic_path)) {
# Try to locate tile-based mosaics in weekly_tile_max instead
tile_mosaic_path <- sub("weekly_mosaic", "weekly_tile_max", mosaic_path)
# Look for any tile files matching the week pattern (e.g., week_XX_YYYY_00.tif, week_XX_YYYY_01.tif, etc.)
if (dir.exists(tile_mosaic_path)) {
tile_files <- list.files(tile_mosaic_path,
pattern = paste0("^week_", target_week, "_", target_year, "_(\\d{2})\\.tif$"),
full.names = TRUE)
if (length(tile_files) > 0) {
# Found tiles - return the first tile as primary, note that multiple tiles exist
safe_log(paste("Single-file mosaic not found for week", target_week, target_year,
"but found", length(tile_files), "tile files in weekly_tile_max. Using tile approach."), "INFO")
# Return first tile - caller should aggregate if needed
path_to_week <- tile_files[1] # Return first tile; downstream can handle multiple tiles
}
}
}
# Log the path calculation # Log the path calculation
safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO") safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO")

View file

@ -1,3 +0,0 @@
# Wrapper script to set project_dir and run KPI calculation
project_dir <- "esa"
source("r_app/09_calculate_kpis.R")

View file

View file

@ -1,35 +0,0 @@
# Simple SAR Data Test
# ====================
cat("Testing SAR data loading...\n")
# Load only essential libraries
library(terra, quietly = TRUE)
# Test loading one SAR file
sar_file <- "python_scripts/data/aura/weekly_SAR_mosaic/week_33_2025_VV_dB_filtered.tif"
if (file.exists(sar_file)) {
cat("✓ SAR file found:", sar_file, "\n")
# Load the raster
sar_data <- rast(sar_file)
cat("✓ SAR data loaded successfully\n")
cat(" Dimensions:", dim(sar_data), "\n")
cat(" CRS:", crs(sar_data), "\n")
cat(" Value range:", round(global(sar_data, range, na.rm = TRUE)[,1], 2), "dB\n")
# Test basic statistics
mean_val <- global(sar_data, mean, na.rm = TRUE)[1,1]
sd_val <- global(sar_data, sd, na.rm = TRUE)[1,1]
cat(" Mean backscatter:", round(mean_val, 2), "dB\n")
cat(" Standard deviation:", round(sd_val, 2), "dB\n")
cat("\n✓ SAR data test successful!\n")
cat("Ready to proceed with full analysis.\n")
} else {
cat("✗ SAR file not found:", sar_file, "\n")
}