SmartCane/data_validation_tool/validator.js
2026-01-06 14:17:37 +01:00

699 lines
25 KiB
JavaScript

// Configuration
const CONFIG = {
REQUIRED_EXCEL_COLUMNS: ['field', 'sub_field', 'year', 'season_start', 'season_end', 'tonnage_ha'],
OPTIONAL_EXCEL_COLUMNS: ['age', 'sub_area'], // age is calculated in script, sub_area is optional
REQUIRED_GEOJSON_PROPERTIES: ['field', 'sub_field'],
VALID_CRS: 'EPSG:32736', // UTM 36S - the correct CRS we learned from the conversation
CRS_DESCRIPTION: 'EPSG:32736 (UTM Zone 36S) - This is the correct CRS learned from geospatial analysis of Angata farm coordinates'
};
let excelData = null;
let geojsonData = null;
let excelLoaded = false;
let geojsonLoaded = false;
// File input handlers
document.getElementById('excelFile').addEventListener('change', handleExcelFile);
document.getElementById('geojsonFile').addEventListener('change', handleGeojsonFile);
document.getElementById('checkButton').addEventListener('click', validateData);
function updateCheckButton() {
const checkButton = document.getElementById('checkButton');
if (excelLoaded && geojsonLoaded) {
checkButton.style.display = 'inline-block';
} else {
checkButton.style.display = 'none';
}
}
// Drag and drop handlers for Excel
const excelDropZone = document.getElementById('excelDropZone');
excelDropZone.addEventListener('dragover', (e) => {
e.preventDefault();
e.stopPropagation();
excelDropZone.style.backgroundColor = '#f0f1ff';
});
excelDropZone.addEventListener('dragleave', (e) => {
e.preventDefault();
e.stopPropagation();
excelDropZone.style.backgroundColor = 'transparent';
});
excelDropZone.addEventListener('drop', (e) => {
e.preventDefault();
e.stopPropagation();
excelDropZone.style.backgroundColor = 'transparent';
const files = e.dataTransfer.files;
if (files.length > 0) {
document.getElementById('excelFile').files = files;
handleExcelFile({ target: { files: files } });
}
});
// Drag and drop handlers for GeoJSON
const geojsonDropZone = document.getElementById('geojsonDropZone');
geojsonDropZone.addEventListener('dragover', (e) => {
e.preventDefault();
e.stopPropagation();
geojsonDropZone.style.backgroundColor = '#f0f1ff';
});
geojsonDropZone.addEventListener('dragleave', (e) => {
e.preventDefault();
e.stopPropagation();
geojsonDropZone.style.backgroundColor = 'transparent';
});
geojsonDropZone.addEventListener('drop', (e) => {
e.preventDefault();
e.stopPropagation();
geojsonDropZone.style.backgroundColor = 'transparent';
const files = e.dataTransfer.files;
if (files.length > 0) {
document.getElementById('geojsonFile').files = files;
handleGeojsonFile({ target: { files: files } });
}
});
function handleExcelFile(e) {
const file = e.target.files[0];
if (!file) return;
document.getElementById('excelFileName').textContent = `${file.name}`;
const reader = new FileReader();
reader.onload = (event) => {
try {
const data = new Uint8Array(event.target.result);
const workbook = XLSX.read(data, { type: 'array' });
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
excelData = XLSX.utils.sheet_to_json(worksheet);
excelLoaded = true;
updateCheckButton();
} catch (error) {
document.getElementById('excelFileName').textContent = `✗ Error: ${error.message}`;
excelLoaded = false;
updateCheckButton();
}
};
reader.onerror = () => {
document.getElementById('excelFileName').textContent = `✗ Failed to read file`;
excelLoaded = false;
updateCheckButton();
};
reader.readAsArrayBuffer(file);
}
function handleGeojsonFile(e) {
const file = e.target.files[0];
if (!file) return;
document.getElementById('geojsonFileName').textContent = `${file.name}`;
const reader = new FileReader();
reader.onload = (event) => {
try {
geojsonData = JSON.parse(event.target.result);
geojsonLoaded = true;
updateCheckButton();
} catch (error) {
document.getElementById('geojsonFileName').textContent = `✗ Invalid JSON: ${error.message}`;
geojsonLoaded = false;
updateCheckButton();
}
};
reader.onerror = () => {
document.getElementById('geojsonFileName').textContent = `✗ Failed to read file`;
geojsonLoaded = false;
updateCheckButton();
};
reader.readAsText(file);
}
function validateData() {
if (!excelData || !geojsonData) {
alert('Please upload both Excel and GeoJSON files before checking.');
return;
}
const results = {
checks: [],
details: []
};
// 1. Excel column validation
const excelColumnCheck = validateExcelColumns();
results.checks.push(excelColumnCheck);
results.details.push(excelColumnCheck.details);
// 2. GeoJSON properties validation
const geojsonPropsCheck = validateGeojsonProperties();
results.checks.push(geojsonPropsCheck);
results.details.push(geojsonPropsCheck.details);
// 3. CRS validation
const crsCheck = validateCRS();
results.checks.push(crsCheck);
results.details.push(crsCheck.details);
// 4. Field name matching
const fieldMatchCheck = validateFieldMatching();
results.checks.push(fieldMatchCheck);
results.details.push(fieldMatchCheck.details);
// 5. Data type and content validation
const dataValidationCheck = validateDataTypes();
results.checks.push(dataValidationCheck);
results.details.push(dataValidationCheck.details);
displayResults(results);
}
function validateExcelColumns() {
const excelColumns = Object.keys(excelData[0] || {});
const missing = CONFIG.REQUIRED_EXCEL_COLUMNS.filter(col => !excelColumns.includes(col));
const hasOptional = CONFIG.OPTIONAL_EXCEL_COLUMNS.filter(col => excelColumns.includes(col));
const notRequired = excelColumns.filter(col => !CONFIG.REQUIRED_EXCEL_COLUMNS.includes(col) && !CONFIG.OPTIONAL_EXCEL_COLUMNS.includes(col));
let status = 'pass';
let message = 'All required columns present';
if (missing.length > 0) {
status = 'fail';
message = `Missing required columns: ${missing.join(', ')}`;
} else if (notRequired.length > 0) {
status = 'warning';
message = `Extra columns detected (will be ignored): ${notRequired.join(', ')}`;
}
return {
name: 'Excel Columns',
status: status,
message: message,
details: {
title: 'Excel Column Validation',
type: 'columns',
required: CONFIG.REQUIRED_EXCEL_COLUMNS,
optional: CONFIG.OPTIONAL_EXCEL_COLUMNS,
found: excelColumns,
missing: missing,
hasOptional: hasOptional,
extra: notRequired
}
};
}
function validateGeojsonProperties() {
if (!geojsonData.features || geojsonData.features.length === 0) {
return {
name: 'GeoJSON Properties',
status: 'fail',
message: 'GeoJSON has no features',
details: {
title: 'GeoJSON Property Validation',
type: 'properties',
error: 'No features found in GeoJSON'
}
};
}
const allProperties = new Set();
const missingInFeatures = [];
geojsonData.features.forEach((feature, idx) => {
const props = feature.properties || {};
Object.keys(props).forEach(p => allProperties.add(p));
CONFIG.REQUIRED_GEOJSON_PROPERTIES.forEach(reqProp => {
if (!props[reqProp]) {
missingInFeatures.push({ feature: idx, property: reqProp, field: props.field || 'Unknown' });
}
});
});
const extra = Array.from(allProperties).filter(p => !CONFIG.REQUIRED_GEOJSON_PROPERTIES.includes(p));
let status = 'pass';
let message = 'All required properties present in all features';
if (missingInFeatures.length > 0) {
status = 'fail';
message = `Missing properties in ${missingInFeatures.length} feature(s)`;
} else if (extra.length > 0) {
status = 'warning';
message = `Extra properties detected: ${extra.join(', ')}`;
}
return {
name: 'GeoJSON Properties',
status: status,
message: message,
details: {
title: 'GeoJSON Property Validation',
type: 'properties',
required: CONFIG.REQUIRED_GEOJSON_PROPERTIES,
found: Array.from(allProperties),
extra: extra,
missingInFeatures: missingInFeatures
}
};
}
function validateCRS() {
const crs = geojsonData.crs;
let detectedCRS = 'Not specified';
let status = 'fail';
let message = `CRS not specified. Expected: ${CONFIG.VALID_CRS}`;
if (crs) {
if (crs.type === 'name' && crs.properties?.name) {
detectedCRS = crs.properties.name;
// Check for various CRS string formats
if (detectedCRS.includes('32736') || detectedCRS.includes('UTM') && detectedCRS.includes('36')) {
status = 'pass';
message = `✓ Correct CRS detected: ${detectedCRS}`;
} else {
status = 'fail';
message = `Wrong CRS: ${detectedCRS}. Expected: ${CONFIG.VALID_CRS}`;
}
}
}
return {
name: 'Coordinate Reference System',
status: status,
message: message,
details: {
title: 'CRS Validation',
type: 'crs',
expected: CONFIG.VALID_CRS,
description: CONFIG.CRS_DESCRIPTION,
detected: detectedCRS,
crsObject: crs
}
};
}
function validateFieldMatching() {
const excelFields = new Set(excelData.map(row => String(row.field).trim()));
const geojsonFields = new Set(geojsonData.features.map(f => String(f.properties.field).trim()));
const matchingFields = Array.from(excelFields).filter(f => geojsonFields.has(f));
const excelOnly = Array.from(excelFields).filter(f => !geojsonFields.has(f));
const geojsonOnly = Array.from(geojsonFields).filter(f => !excelFields.has(f));
let status = 'pass';
let message = 'All field names match between Excel and GeoJSON';
if (excelOnly.length > 0 || geojsonOnly.length > 0) {
status = 'fail';
message = `Field name mismatches detected: ${excelOnly.length} in Excel only, ${geojsonOnly.length} in GeoJSON only`;
}
// Create matching table
const matchingTable = [];
excelFields.forEach(field => {
const inGeojson = geojsonFields.has(field);
matchingTable.push({
field: field,
excel: true,
geojson: inGeojson,
status: inGeojson ? 'match' : 'mismatch'
});
});
geojsonOnly.forEach(field => {
matchingTable.push({
field: field,
excel: false,
geojson: true,
status: 'mismatch'
});
});
return {
name: 'Field Name Matching',
status: status,
message: message,
details: {
title: 'Field Name Matching',
type: 'fieldMatching',
matching: matchingFields,
excelOnly: excelOnly,
geojsonOnly: geojsonOnly,
matchingTable: matchingTable
}
};
}
function validateDataTypes() {
const issues = [];
const missingDates = [];
const invalidYears = [];
const invalidNumerics = [];
excelData.forEach((row, idx) => {
// Check season_start
if (!row.season_start || row.season_start === '') {
missingDates.push({ row: idx + 2, field: row.field, column: 'season_start' });
} else if (!isValidDate(row.season_start)) {
invalidYears.push({ row: idx + 2, field: row.field, column: 'season_start', value: row.season_start });
}
// Check year
if (!Number.isInteger(parseFloat(row.year))) {
invalidYears.push({ row: idx + 2, field: row.field, column: 'year', value: row.year });
}
// Check numeric columns (age is optional, sub_area is text, not numeric)
['tonnage_ha'].forEach(col => {
const val = row[col];
if (val !== '' && val !== null && isNaN(parseFloat(val))) {
invalidNumerics.push({ row: idx + 2, field: row.field, column: col, value: val });
}
});
});
let status = 'pass';
let message = 'All data types valid';
if (missingDates.length > 0 || invalidYears.length > 0 || invalidNumerics.length > 0) {
status = 'warning';
message = `Data validation issues found: ${missingDates.length} missing dates, ${invalidYears.length} invalid years/dates, ${invalidNumerics.length} invalid numerics`;
}
return {
name: 'Data Validation',
status: status,
message: message,
details: {
title: 'Data Type & Content Validation',
type: 'dataValidation',
missingDates: missingDates,
invalidYears: invalidYears,
invalidNumerics: invalidNumerics
}
};
}
function isValidDate(dateString) {
if (!dateString) return false;
const date = new Date(dateString);
return date instanceof Date && !isNaN(date);
}
function displayResults(results) {
const trafficLight = document.getElementById('trafficLight');
const detailsSection = document.getElementById('detailsSection');
const resultsSection = document.getElementById('resultsSection');
trafficLight.innerHTML = '';
detailsSection.innerHTML = '';
// Display traffic lights
results.checks.forEach(check => {
const light = document.createElement('div');
light.className = `check-item ${check.status}`;
light.innerHTML = `
<span class="light ${check.status === 'pass' ? 'green' : check.status === 'warning' ? 'yellow' : 'red'}"></span>
<div>
<strong>${check.name}</strong>
<div style="font-size: 13px; margin-top: 4px;">${check.message}</div>
</div>
`;
trafficLight.appendChild(light);
});
// Display details
results.details.forEach(detail => {
if (detail.type === 'columns') {
detailsSection.appendChild(createColumnDetails(detail));
} else if (detail.type === 'properties') {
detailsSection.appendChild(createPropertiesDetails(detail));
} else if (detail.type === 'crs') {
detailsSection.appendChild(createCRSDetails(detail));
} else if (detail.type === 'fieldMatching') {
detailsSection.appendChild(createFieldMatchingDetails(detail));
} else if (detail.type === 'dataValidation') {
detailsSection.appendChild(createDataValidationDetails(detail));
}
});
resultsSection.classList.add('show');
}
function createColumnDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
// Required columns
section.innerHTML += `
<div style="margin-bottom: 15px;">
<strong>Required Columns:</strong>
<div class="field-list" style="margin-top: 8px;">
${detail.required.map(col => `<div class="field-badge" style="border-left-color: #28a745; background: #d4edda; color: #155724;">${col}</div>`).join('')}
</div>
</div>
`;
// Optional columns
if (detail.optional && detail.optional.length > 0) {
section.innerHTML += `
<div style="margin-bottom: 15px;">
<strong>Optional Columns (not required):</strong>
<div class="field-list" style="margin-top: 8px;">
${detail.optional.map(col => `<div class="field-badge" style="border-left-color: #17a2b8; background: #d1ecf1; color: #0c5460;">${col}</div>`).join('')}
</div>
<small style="display: block; margin-top: 8px;">✓ <em>${detail.optional.join(', ')} ${detail.optional.length === 1 ? 'is' : 'are'} calculated in the system or optional</em></small>
</div>
`;
}
if (detail.missing.length > 0) {
section.innerHTML += `
<div class="message-box error">
<strong>❌ Missing Required Columns:</strong><br>${detail.missing.join(', ')}
</div>
`;
}
if (detail.extra.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong>⚠️ Extra Columns (will be ignored):</strong><br>${detail.extra.join(', ')}
</div>
`;
}
if (detail.missing.length === 0 && detail.extra.length === 0) {
section.innerHTML += `
<div class="message-box success">
<strong>✓ Perfect!</strong> All required columns present.
</div>
`;
}
return section;
}
function createPropertiesDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
if (detail.error) {
section.innerHTML += `<div class="message-box error">${detail.error}</div>`;
return section;
}
if (detail.missingInFeatures && detail.missingInFeatures.length > 0) {
section.innerHTML += `
<div class="message-box error">
<strong>❌ Missing Properties in Features:</strong>
<table>
<tr><th>Feature #</th><th>Field Name</th><th>Missing Property</th></tr>
${detail.missingInFeatures.map(m => `<tr><td>${m.feature}</td><td>${m.field}</td><td>${m.property}</td></tr>`).join('')}
</table>
</div>
`;
}
if (detail.extra && detail.extra.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong>⚠️ Extra Properties (redundant):</strong><br>${detail.extra.join(', ')}<br>
<small>These will be ignored during processing.</small>
</div>
`;
}
if ((!detail.missingInFeatures || detail.missingInFeatures.length === 0) && (!detail.extra || detail.extra.length === 0)) {
section.innerHTML += `
<div class="message-box success">
<strong>✓ Perfect!</strong> All required properties present in all ${geojsonData.features.length} features.
</div>
`;
}
return section;
}
function createCRSDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
if (detail.detected === 'Not specified') {
section.innerHTML += `
<div class="message-box error">
<strong>❌ CRS Not Specified</strong><br>
Expected: <code>${detail.expected}</code><br>
${detail.description}
</div>
`;
} else if (detail.detected.includes('32736') || (detail.detected.includes('UTM') && detail.detected.includes('36'))) {
section.innerHTML += `
<div class="message-box success">
<strong>✓ Correct CRS</strong><br>
Detected: <code>${detail.detected}</code><br>
${detail.description}
</div>
`;
} else {
section.innerHTML += `
<div class="message-box error">
<strong>❌ Wrong CRS</strong><br>
Expected: <code>${detail.expected}</code><br>
Detected: <code>${detail.detected}</code><br>
${detail.description}
</div>
`;
}
if (detail.crsObject) {
section.innerHTML += `
<div style="margin-top: 15px; padding: 10px; background: #f8f9ff; border-radius: 4px; font-size: 12px;">
<strong>CRS Details:</strong><br>
<code>${JSON.stringify(detail.crsObject, null, 2)}</code>
</div>
`;
}
return section;
}
function createFieldMatchingDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
if (detail.excelOnly.length > 0) {
section.innerHTML += `
<div class="message-box error">
<strong>❌ Fields in Excel but NOT in GeoJSON (${detail.excelOnly.length}):</strong>
<div class="field-list">
${detail.excelOnly.map(f => `<div class="field-badge missing">${f}</div>`).join('')}
</div>
<small style="display: block; margin-top: 10px;">These fields exist in your harvest data but have no boundaries defined in the GeoJSON.</small>
</div>
`;
}
if (detail.geojsonOnly.length > 0) {
section.innerHTML += `
<div class="message-box error">
<strong>❌ Fields in GeoJSON but NOT in Excel (${detail.geojsonOnly.length}):</strong>
<div class="field-list">
${detail.geojsonOnly.map(f => `<div class="field-badge extra">${f}</div>`).join('')}
</div>
<small style="display: block; margin-top: 10px;">These fields have boundaries defined but no data in your harvest spreadsheet.</small>
</div>
`;
}
if (detail.matching.length > 0) {
section.innerHTML += `
<div class="message-box success">
<strong>✓ Matching Fields (${detail.matching.length}):</strong>
<div class="field-list">
${detail.matching.map(f => `<div class="field-badge" style="border-left-color: #28a745; background: #d4edda; color: #155724;">${f}</div>`).join('')}
</div>
</div>
`;
}
// Full matching table
section.innerHTML += `
<div style="margin-top: 20px;">
<strong>Complete Field Summary:</strong>
<table>
<tr>
<th>Field Name</th>
<th>In Excel</th>
<th>In GeoJSON</th>
<th>Status</th>
</tr>
${detail.matchingTable.map(row => `
<tr>
<td><strong>${row.field}</strong></td>
<td>${row.excel ? '✓' : '✗'}</td>
<td>${row.geojson ? '✓' : '✗'}</td>
<td><span class="${row.status}">${row.status === 'match' ? '🟢 Match' : '🔴 Mismatch'}</span></td>
</tr>
`).join('')}
</table>
</div>
`;
return section;
}
function createDataValidationDetails(detail) {
const section = document.createElement('div');
section.innerHTML = `<h3>${detail.title}</h3>`;
if (detail.missingDates.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong>⚠️ Missing season_start dates (${detail.missingDates.length}):</strong>
<table style="font-size: 13px;">
<tr><th>Row #</th><th>Field Name</th></tr>
${detail.missingDates.map(m => `<tr><td>${m.row}</td><td>${m.field}</td></tr>`).join('')}
</table>
</div>
`;
}
if (detail.invalidYears.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong>⚠️ Invalid dates/years (${detail.invalidYears.length}):</strong>
<table style="font-size: 13px;">
<tr><th>Row #</th><th>Field Name</th><th>Column</th><th>Value</th></tr>
${detail.invalidYears.map(m => `<tr><td>${m.row}</td><td>${m.field}</td><td>${m.column}</td><td>${m.value}</td></tr>`).join('')}
</table>
</div>
`;
}
if (detail.invalidNumerics.length > 0) {
section.innerHTML += `
<div class="message-box warning">
<strong>⚠️ Invalid numeric values (${detail.invalidNumerics.length}):</strong>
<table style="font-size: 13px;">
<tr><th>Row #</th><th>Field Name</th><th>Column</th><th>Value</th></tr>
${detail.invalidNumerics.map(m => `<tr><td>${m.row}</td><td>${m.field}</td><td>${m.column}</td><td>${m.value}</td></tr>`).join('')}
</table>
</div>
`;
}
if (detail.missingDates.length === 0 && detail.invalidYears.length === 0 && detail.invalidNumerics.length === 0) {
section.innerHTML += `
<div class="message-box success">
<strong>✓ All data types valid!</strong> No missing dates or invalid values detected.
</div>
`;
}
return section;
}
function showError(fileType, message) {
alert(`${fileType} Error: ${message}`);
}