Skip to content

Commit b2f2511

Browse files
committed
Added strict header mapping parsing
refs https://github.com/TryGhost/Toolbox/issues/430 - Previously the CSV parser had "map whatever you can and pass on unknown properties further" approach to CSV parsing. This logic has led to unwanted fields leaking through CSV imports - messy, dangerous. - The strict mapping rules act as a "validator" to the user input, only passing through the fields we expect explicitly - safer clean cut solution with no unintended side-effects.
1 parent b765ccc commit b2f2511

File tree

2 files changed

+32
-30
lines changed

2 files changed

+32
-30
lines changed

packages/members-csv/lib/parse.js

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,17 @@ const fs = require('fs-extra');
1010
* @param {Array<string>} [defaultLabels] - A list of labels to apply to every parsed member row
1111
* @returns
1212
*/
13-
module.exports = (path, headerMapping = {}, defaultLabels = []) => {
13+
module.exports = (path, headerMapping, defaultLabels = []) => {
1414
return new Promise(function (resolve, reject) {
1515
const csvFileStream = fs.createReadStream(path);
1616
const csvParserStream = papaparse.parse(papaparse.NODE_STREAM_INPUT, {
1717
header: true,
1818
transformHeader(_header) {
19-
let header = _header;
20-
if (headerMapping && Reflect.has(headerMapping, _header)) {
21-
header = headerMapping[_header];
19+
if (!headerMapping || !Reflect.has(headerMapping, _header)) {
20+
return undefined;
2221
}
2322

24-
return header;
23+
return headerMapping[_header];
2524
},
2625
transform(value, header) {
2726
if (header === 'labels') {
@@ -73,11 +72,23 @@ module.exports = (path, headerMapping = {}, defaultLabels = []) => {
7372
});
7473

7574
parsedCSVStream.on('data', (row) => {
75+
// unmapped columns end up being assigned to 'undefined' property
76+
// in the transformHeader stage, those should be removed completely
77+
if (Reflect.has(row, 'undefined')) {
78+
delete row.undefined;
79+
}
80+
81+
// skip a rows with no data
82+
if (!Object.keys(row).length){
83+
return;
84+
}
85+
7686
if (row.labels) {
7787
row.labels = row.labels.concat(defaultLabels);
7888
} else {
7989
row.labels = defaultLabels;
8090
}
91+
8192
rows.push(row);
8293
});
8394
});

packages/members-csv/test/parse.test.js

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,22 @@ const {parse} = require('../index');
55
const csvPath = path.join(__dirname, '/fixtures/');
66

77
describe('parse', function () {
8+
const DEFAULT_HEADER_MAPPING = {
9+
email: 'email',
10+
name: 'name'
11+
};
12+
813
it('empty file', async function () {
914
const filePath = csvPath + 'empty.csv';
10-
const result = await parse(filePath);
15+
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
1116

1217
should.exist(result);
1318
result.length.should.eql(0);
1419
});
1520

1621
it('one column', async function () {
1722
const filePath = csvPath + 'single-column-with-header.csv';
18-
const result = await parse(filePath);
23+
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
1924

2025
should.exist(result);
2126
result.length.should.eql(2);
@@ -33,7 +38,7 @@ describe('parse', function () {
3338

3439
it('two columns, 1 filter', async function () {
3540
const filePath = csvPath + 'two-columns-with-header.csv';
36-
const result = await parse(filePath);
41+
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
3742

3843
should.exist(result);
3944
result.length.should.eql(2);
@@ -44,6 +49,7 @@ describe('parse', function () {
4449
it('two columns, 2 filters', async function () {
4550
const filePath = csvPath + 'two-columns-obscure-header.csv';
4651
const mapping = {
52+
id: 'id',
4753
'Email Address': 'email'
4854
};
4955
const result = await parse(filePath, mapping);
@@ -59,6 +65,7 @@ describe('parse', function () {
5965
it('two columns with mapping', async function () {
6066
const filePath = csvPath + 'two-columns-mapping-header.csv';
6167
const mapping = {
68+
id: 'id',
6269
correo_electronico: 'email',
6370
nombre: 'name'
6471
};
@@ -78,40 +85,23 @@ describe('parse', function () {
7885
it('two columns with partial mapping', async function () {
7986
const filePath = csvPath + 'two-columns-mapping-header.csv';
8087
const mapping = {
88+
id: 'id',
8189
correo_electronico: 'email'
8290
};
8391
const result = await parse(filePath, mapping);
8492

8593
should.exist(result);
8694
result.length.should.eql(2);
8795
result[0].email.should.eql('[email protected]');
88-
result[0].nombre.should.eql('joe');
8996
result[0].id.should.eql('1');
9097

9198
result[1].email.should.eql('[email protected]');
92-
result[1].nombre.should.eql('test');
93-
result[1].id.should.eql('2');
94-
});
95-
96-
it('two columns with empty mapping', async function () {
97-
const filePath = csvPath + 'two-columns-mapping-header.csv';
98-
const mapping = {};
99-
const result = await parse(filePath, mapping);
100-
101-
should.exist(result);
102-
result.length.should.eql(2);
103-
result[0].correo_electronico.should.eql('[email protected]');
104-
result[0].nombre.should.eql('joe');
105-
result[0].id.should.eql('1');
106-
107-
result[1].correo_electronico.should.eql('[email protected]');
108-
result[1].nombre.should.eql('test');
10999
result[1].id.should.eql('2');
110100
});
111101

112102
it('transforms empty values to nulls', async function () {
113103
const filePath = csvPath + 'multiple-records-with-empty-values.csv';
114-
const result = await parse(filePath);
104+
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
115105

116106
should.exist(result);
117107
result.length.should.eql(2);
@@ -125,6 +115,7 @@ describe('parse', function () {
125115
it(' transforms "subscribed_to_emails" column to "subscribed" property when the mapping is passed in', async function () {
126116
const filePath = csvPath + 'subscribed-to-emails-header.csv';
127117
const mapping = {
118+
email: 'email',
128119
subscribed_to_emails: 'subscribed'
129120
};
130121
const result = await parse(filePath, mapping);
@@ -140,14 +131,14 @@ describe('parse', function () {
140131

141132
it('DOES NOT transforms "subscribed_to_emails" column to "subscribed" property when the WITHOUT mapping', async function () {
142133
const filePath = csvPath + 'subscribed-to-emails-header.csv';
143-
const result = await parse(filePath);
134+
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
144135

145136
assert.ok(result);
146137
assert.equal(result.length, 2);
147138
assert.equal(result[0].email, '[email protected]');
148-
assert.ok(result[0].subscribed_to_emails);
139+
assert.equal(result[0].subscribed_to_emails, undefined, 'property not present in the mapping should not be defined');
149140

150141
assert.equal(result[1].email, '[email protected]');
151-
assert.equal(result[1].subscribed_to_emails, false);
142+
assert.equal(result[1].subscribed_to_emails, undefined, 'property not present in the mapping should not be defined');
152143
});
153144
});

0 commit comments

Comments
 (0)