Skip to content

Commit 7e7d0c4

Browse files
authored
Validation (#141)
Added a validation mechanism, invoke by providing attribute type validators when setting up the import mapping. If none are specified, it defaults to everything being strings with no validation, matching current behaviour. The mapping results may now include errors that cause input rows to be rejected, or warnings where a row could be interpreted but there might be a problem with it (eg, an attribute value was malformed for the required type, but it was marked as optional anyway so we can live without it). Adding support to display these warnings/errors in the frontend will be a separate bit of work.
1 parent 3ea5f4c commit 7e7d0c4

File tree

7 files changed

+298
-10
lines changed

7 files changed

+298
-10
lines changed

fixtures/test-validation.csv

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
TEST SPREADSHEET,,,,,,
2+
required string,required number,optional string,optional number,,,
3+
Name,Age,Egginess,Weight,,Valid?,Comment
4+
Boris,13,High,5.5,,TRUE,All fields present
5+
,,,,,FALSE,All fields blank; row will be silently ignored
6+
Nelly,10,,,,TRUE,Optional fields absent
7+
Sid,twelve,,,,FALSE,Invalid required number
8+
,nine,,,,FALSE,"Missing required string, invalid required number"
9+
Dave,15,,unknown,,TRUE,Invalid optional number

fixtures/test-validation.ods

12.6 KB
Binary file not shown.

fixtures/test-validation.xlsx

6.33 KB
Binary file not shown.

lib/importer/attribute-types.js

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// "input values" are what's found in the input data, and will generally be
2+
// simple string/boolean/number/date/etc things. "attribute values" are the
3+
// actual attributes in our output data model and can be some arbitrary type.
4+
5+
// An attribute type is a function from an input value to a result.
6+
7+
class AttributeMappingResult {
8+
constructor(value, warnings, errors) {
9+
this.value = value;
10+
this.warnings = warnings || [];
11+
this.errors = errors;
12+
}
13+
14+
get valid() {
15+
return !this.errors;
16+
}
17+
18+
get empty() {
19+
return this.value !== undefined;
20+
}
21+
}
22+
23+
// These helpers define the three kinds of results:
24+
25+
function emptyMapping(warnings) {
26+
return new AttributeMappingResult(undefined, warnings);
27+
}
28+
29+
function successfulMapping(outputValue, warnings) {
30+
return new AttributeMappingResult(outputValue, warnings);
31+
}
32+
33+
function failedMapping(warnings, errors) {
34+
return new AttributeMappingResult(undefined, warnings, errors);
35+
}
36+
37+
// Create an optional version of an existing type, that allows empty strings or
38+
// undefined values and maps then to "undefined", and converts any validation errors to warnings
39+
exports.optionalType = (baseType) => {
40+
return (inputValue) => {
41+
if(inputValue !== undefined && inputValue != "") {
42+
const result = baseType(inputValue);
43+
if(result.valid) {
44+
return result;
45+
} else {
46+
// Convert any errors into warnings, as this is an optional field
47+
return emptyMapping(result.warnings.concat(result.errors));
48+
}
49+
}
50+
else {
51+
return emptyMapping();
52+
}
53+
};
54+
}
55+
56+
// Create a required version of an existing type, that reports an undefined or empty-string input value as an error
57+
exports.requiredType = (baseType) => {
58+
return (inputValue) => {
59+
if(inputValue !== undefined && inputValue != "") {
60+
return baseType(inputValue);
61+
}
62+
else {
63+
return failedMapping([], ["A value must be provided"]);
64+
}
65+
};
66+
}
67+
68+
// The default string type
69+
exports.basicStringType = (inputValue) => {
70+
return successfulMapping(String(inputValue));
71+
};
72+
73+
// The default numeric type
74+
exports.basicNumberType = (inputValue) => {
75+
const result = parseFloat(inputValue);
76+
if(isNaN(result)) {
77+
return failedMapping([], ["This is not a valid number"]);
78+
} else {
79+
return successfulMapping(result);
80+
}
81+
};
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
const attributeTypes = require('./attribute-types');
2+
3+
test('required basic string', () => {
4+
// Also tests the workings of the requiredType system
5+
const t = attributeTypes.requiredType(attributeTypes.basicStringType);
6+
7+
let r = t(undefined);
8+
expect(r.valid).toBeFalsy();
9+
expect(r.warnings).toMatchObject([]);
10+
expect(r.errors).toMatchObject(["A value must be provided"]);
11+
12+
r = t("");
13+
expect(r.valid).toBeFalsy();
14+
expect(r.warnings).toMatchObject([]);
15+
expect(r.errors).toMatchObject(["A value must be provided"]);
16+
17+
r = t("foo");
18+
expect(r.valid).toBeTruthy();
19+
expect(r.value).toEqual("foo");
20+
expect(r.warnings).toMatchObject([]);
21+
22+
r = t(123);
23+
expect(r.valid).toBeTruthy();
24+
expect(r.value).toEqual("123");
25+
expect(r.warnings).toMatchObject([]);
26+
});
27+
28+
test('optional basic string', () => {
29+
// Also tests the workings of the optionalType system
30+
const t = attributeTypes.optionalType(attributeTypes.basicStringType);
31+
32+
let r = t(undefined);
33+
expect(r.valid).toBeTruthy();
34+
expect(r.warnings).toMatchObject([]);
35+
expect(r.value).toEqual(undefined);
36+
37+
r = t("");
38+
expect(r.valid).toBeTruthy();
39+
expect(r.warnings).toMatchObject([]);
40+
expect(r.value).toEqual(undefined);
41+
42+
r = t("foo");
43+
expect(r.valid).toBeTruthy();
44+
expect(r.value).toEqual("foo");
45+
expect(r.warnings).toMatchObject([]);
46+
47+
r = t(123);
48+
expect(r.valid).toBeTruthy();
49+
expect(r.value).toEqual("123");
50+
expect(r.warnings).toMatchObject([]);
51+
});
52+
53+
test('required basic number', () => {
54+
const t = attributeTypes.requiredType(attributeTypes.basicNumberType);
55+
56+
let r = t("foo");
57+
expect(r.valid).toBeFalsy();
58+
expect(r.warnings).toMatchObject([]);
59+
expect(r.errors).toMatchObject(["This is not a valid number"]);
60+
61+
r = t(123);
62+
expect(r.valid).toBeTruthy();
63+
expect(r.value).toEqual(123);
64+
expect(r.warnings).toMatchObject([]);
65+
});
66+
67+
test('optional basic number', () => {
68+
// Also tests that optionalType converts errors into warnings
69+
const t = attributeTypes.optionalType(attributeTypes.basicNumberType);
70+
71+
let r = t("foo");
72+
expect(r.valid).toBeTruthy();
73+
expect(r.value).toBeUndefined();
74+
expect(r.warnings).toMatchObject(["This is not a valid number"]);
75+
76+
r = t(123);
77+
expect(r.valid).toBeTruthy();
78+
expect(r.value).toEqual(123);
79+
expect(r.warnings).toMatchObject([]);
80+
});

lib/importer/backend.js

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
const xlsx = require("xlsx");
22
const crypto = require("crypto");
33
const assert = require('node:assert').strict;
4+
const attributeTypes = require('./attribute-types');
45

56
// An implementation of the interface described in https://struct.register-dynamics.co.uk/trac/wiki/DataImporter/API
67

@@ -456,6 +457,14 @@ function validateMapping(range, mapping) {
456457
assert(attrSource >= 0);
457458
assert(attrSource < columnsInRange);
458459
}
460+
461+
// Output types are optional, we fall back to basicStringType if not specified
462+
if(mapping.attributeTypes) {
463+
// eslint-disable-next-line no-unused-vars
464+
for(const [attribute, attrType] of Object.entries(mapping.attributeTypes)) {
465+
assert(attrType instanceof Function);
466+
}
467+
}
459468
}
460469

461470
function mapCellValue(cell) {
@@ -479,6 +488,7 @@ exports.SessionPerformMappingJob = (sid, range, mapping) => {
479488
const data = sheet["!data"];
480489
const merges = sheet["!merges"];
481490
const attrMap = Object.entries(mapping.attributeMappings);
491+
const attrTypes = mapping.attributeTypes || {};
482492

483493
for(let rowIdx=range.start.row; rowIdx <= range.end.row; rowIdx++) {
484494
let row = getMergedRow(data, merges, rowIdx);
@@ -489,15 +499,14 @@ exports.SessionPerformMappingJob = (sid, range, mapping) => {
489499
if (row) {
490500
attrMap.forEach((element) => {
491501
const [attr, m] = element;
502+
492503
// For now, attribute mappings are just integer column offsets
493504
const inputColumn = range.start.column + m;
494-
if (inputColumn >= row.length) {
495-
// If a row is missing values at the end, this may be
496-
// represented as a "short" row array.
497-
record[attr] = undefined;
498-
} else {
499-
const cell = row[range.start.column + m];
500-
if(cell.v) {
505+
// If a row is missing values at the end, this may be
506+
// represented as a "short" row array.
507+
if (inputColumn < row.length) {
508+
const cell = row[inputColumn];
509+
if(cell && cell.v) {
501510
record[attr] = mapCellValue(cell);
502511
foundSomeValues = true;
503512
}
@@ -506,7 +515,35 @@ exports.SessionPerformMappingJob = (sid, range, mapping) => {
506515
}
507516

508517
if (foundSomeValues) {
509-
records.push(record);
518+
// Only if we found something do we validate and map the types
519+
let mappedRecord = {};
520+
521+
Object.entries(record).forEach((element) => {
522+
const [attr, inputVal] = element;
523+
const attrType = attrTypes[attr] || attributeTypes.basicStringType;
524+
525+
const result = attrType(inputVal);
526+
527+
result.warnings.forEach((text) => {
528+
rowWarnings.push([attr,text]);
529+
});
530+
531+
if(result.valid) {
532+
// Succeeded, but maybe an empty result
533+
if(result.value !== undefined) {
534+
mappedRecord[attr] = result.value;
535+
}
536+
} else {
537+
// Failed
538+
result.errors.forEach((text) => {
539+
rowErrors.push([attr,text]);
540+
});
541+
}
542+
});
543+
544+
if (rowErrors.length == 0) {
545+
records.push(mappedRecord);
546+
}
510547
}
511548

512549
if(rowWarnings.length > 0) {

lib/importer/backend.test.js

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
const backend = require('./backend');
2-
2+
const attributeTypes = require('./attribute-types');
33

44
const testFiles = new Map([
55
["test", [
@@ -16,7 +16,12 @@ const testFiles = new Map([
1616
["tribbles.xlsx", "My lovely tribbles"],
1717
["tribbles.csv", "Sheet1"],
1818
["tribbles.ods", "My lovely tribbles"],
19-
]]
19+
]],
20+
["test-validation",
21+
[["test-validation.xlsx", "Cool Data"]],
22+
[["test-validation.csv", "Sheet1"]],
23+
[["test-validation.ods", "Cool Data"]],
24+
]
2025
]);
2126

2227
const prefix_fixture = ([fixture_name, sheetname]) => {
@@ -74,6 +79,11 @@ describe("Backend tests", () => {
7479
Age: 1,
7580
Egginess: 2
7681
},
82+
attributeTypes: {
83+
Name: attributeTypes.requiredType(attributeTypes.basicStringType),
84+
Age: attributeTypes.requiredType(attributeTypes.basicNumberType),
85+
Egginess: attributeTypes.requiredType(attributeTypes.basicStringType)
86+
},
7787
};
7888

7989
// Do a mapping
@@ -208,6 +218,11 @@ describe("Backend tests", () => {
208218
Age: 2,
209219
Egginess: 3
210220
},
221+
attributeTypes: {
222+
Name: attributeTypes.requiredType(attributeTypes.basicStringType),
223+
Age: attributeTypes.requiredType(attributeTypes.basicNumberType),
224+
Egginess: attributeTypes.requiredType(attributeTypes.basicStringType)
225+
}
211226
};
212227

213228
const jid = backend.SessionPerformMappingJob(sid, dataRange, mapping);
@@ -338,6 +353,11 @@ describe("Backend tests", () => {
338353
Age: 1,
339354
Egginess: 2
340355
},
356+
attributeTypes: {
357+
Name: attributeTypes.requiredType(attributeTypes.basicStringType),
358+
Age: attributeTypes.requiredType(attributeTypes.basicNumberType),
359+
Egginess: attributeTypes.requiredType(attributeTypes.basicStringType)
360+
},
341361
};
342362

343363
const jid = backend.SessionPerformMappingJob(sid, dataRange, mapping);
@@ -445,3 +465,64 @@ describe("Backend tests", () => {
445465
});
446466

447467
});
468+
469+
test('validation', () => {
470+
const fixtures = testFiles.get("test-validation").map(prefix_fixture)
471+
for( const [filename, sheetname] of fixtures) {
472+
const sid = backend.CreateSession();
473+
backend.SessionSetFile(sid, filename);
474+
475+
const dataRange = {
476+
sheet: sheetname,
477+
start: {row: 3, column: 0},
478+
end: {row: 8, column: 3}};
479+
480+
const mapping = {
481+
attributeMappings: {
482+
Name: 0,
483+
Age: 1,
484+
Egginess: 2,
485+
Weight: 3,
486+
},
487+
attributeTypes: {
488+
Name: attributeTypes.requiredType(attributeTypes.basicStringType),
489+
Age: attributeTypes.requiredType(attributeTypes.basicNumberType),
490+
Egginess: attributeTypes.optionalType(attributeTypes.basicStringType),
491+
Weight: attributeTypes.optionalType(attributeTypes.basicNumberType)
492+
}
493+
};
494+
495+
// Do a mapping
496+
497+
const jid = backend.SessionPerformMappingJob(sid, dataRange, mapping);
498+
499+
// Look at the job results
500+
501+
const jobSummary = backend.JobGetSummary(jid);
502+
expect(jobSummary).toMatchObject({
503+
recordCount: 3,
504+
errorCount: 2,
505+
warningCount: 1
506+
});
507+
508+
const warnings = backend.JobGetWarnings(jid); // Weight is optional so errors here become warnings
509+
expect(warnings).toMatchObject(new Map([[8, [['Weight', 'This is not a valid number']]]]));
510+
511+
const errors = backend.JobGetErrors(jid);
512+
expect(errors).toMatchObject(new Map([[6, [['Age', 'This is not a valid number']]],
513+
[7, [['Age', 'This is not a valid number']]]]));
514+
515+
const allRecords = backend.JobGetRecords(jid, 0, 3);
516+
expect(allRecords).toMatchObject([
517+
{ Name: 'Boris', Age: 13, Egginess: 'High', Weight: 5.5 },
518+
{ Name: 'Nelly', Age: 10},
519+
{ Name: 'Dave', Age: 15}
520+
]);
521+
522+
backend.JobDelete(sid, jid);
523+
524+
backend.SessionDelete(sid);
525+
526+
}
527+
528+
});

0 commit comments

Comments
 (0)