Skip to content

Commit 5836659

Browse files
authored
Merge pull request #1200 from david-roper/non-visisble-chars
Display error msg when a non-visible character is present in the csv
2 parents 0b46205 + e81547a commit 5836659

File tree

2 files changed

+124
-33
lines changed

2 files changed

+124
-33
lines changed

apps/web/src/utils/__tests__/upload.test.ts

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ describe('Zod3', () => {
3434
});
3535

3636
it('should parse array of objects', () => {
37-
const result = Zod3.getZodTypeName(z3.array(z3.object({ name: z3.string(), age: z3.number() })));
37+
const result = Zod3.getZodTypeName(z3.array(z3.object({ age: z3.number(), name: z3.string() })));
3838
expect(result).toMatchObject({
3939
isOptional: false,
4040
multiKeys: ['name', 'age'],
@@ -58,10 +58,10 @@ describe('Zod3', () => {
5858
describe('processInstrumentCSV', () => {
5959
const mockInstrument = {
6060
validationSchema: z3.object({
61-
score: z3.number(),
62-
notes: z3.string()
61+
notes: z3.string(),
62+
score: z3.number()
6363
})
64-
} as AnyUnilingualFormInstrument;
64+
} as unknown as AnyUnilingualFormInstrument;
6565

6666
it('should process valid CSV data', async () => {
6767
const csvContent = unparse([
@@ -74,9 +74,9 @@ describe('Zod3', () => {
7474

7575
expect(result).toHaveLength(1);
7676
expect(result[0]).toMatchObject({
77-
subjectID: 'subject1',
77+
notes: 'Good performance',
7878
score: 85,
79-
notes: 'Good performance'
79+
subjectID: 'subject1'
8080
});
8181
});
8282

@@ -89,10 +89,10 @@ describe('Zod3', () => {
8989
it('should handle optional fields', async () => {
9090
const instrumentWithOptional = {
9191
validationSchema: z3.object({
92-
required: z3.string(),
93-
optional: z3.string().optional()
92+
optional: z3.string().optional(),
93+
required: z3.string()
9494
})
95-
} as AnyUnilingualFormInstrument;
95+
} as unknown as AnyUnilingualFormInstrument;
9696

9797
const csvContent = unparse([
9898
['subjectID', 'date', 'required', 'optional'],
@@ -104,8 +104,8 @@ describe('Zod3', () => {
104104

105105
expect(result).toHaveLength(1);
106106
expect(result[0]).toMatchObject({
107-
required: 'value',
108-
optional: undefined
107+
optional: undefined,
108+
required: 'value'
109109
});
110110
});
111111

@@ -114,7 +114,7 @@ describe('Zod3', () => {
114114
validationSchema: z3.object({
115115
completed: z3.boolean()
116116
})
117-
} as AnyUnilingualFormInstrument;
117+
} as unknown as AnyUnilingualFormInstrument;
118118

119119
const csvContent = unparse([
120120
['subjectID', 'date', 'completed'],
@@ -133,7 +133,7 @@ describe('Zod3', () => {
133133
validationSchema: z3.object({
134134
tags: z3.set(z3.enum(['tag1', 'tag2', 'tag3']))
135135
})
136-
} as AnyUnilingualFormInstrument;
136+
} as unknown as AnyUnilingualFormInstrument;
137137

138138
const csvContent = unparse([
139139
['subjectID', 'date', 'tags'],
@@ -197,10 +197,10 @@ describe('Zod4', () => {
197197
describe('processInstrumentCSV', () => {
198198
const mockInstrument = {
199199
validationSchema: z4.object({
200-
score: z4.number(),
201-
feedback: z4.string()
200+
feedback: z4.string(),
201+
score: z4.number()
202202
})
203-
} as AnyUnilingualFormInstrument;
203+
} as unknown as AnyUnilingualFormInstrument;
204204

205205
it('should process valid CSV data', async () => {
206206
const csvContent = unparse([
@@ -213,9 +213,9 @@ describe('Zod4', () => {
213213

214214
expect(result).toHaveLength(1);
215215
expect(result[0]).toMatchObject({
216-
subjectID: 'subject1',
216+
feedback: 'Excellent work',
217217
score: 92,
218-
feedback: 'Excellent work'
218+
subjectID: 'subject1'
219219
});
220220
});
221221

@@ -234,7 +234,7 @@ describe('Zod4', () => {
234234
validationSchema: z4.object({
235235
eventDate: z4.date()
236236
})
237-
} as AnyUnilingualFormInstrument;
237+
} as unknown as AnyUnilingualFormInstrument;
238238

239239
const csvContent = unparse([
240240
['subjectID', 'date', 'eventDate'],
@@ -253,7 +253,7 @@ describe('Zod4', () => {
253253
validationSchema: z4.object({
254254
status: z4.enum(['pending', 'active', 'completed'])
255255
})
256-
} as AnyUnilingualFormInstrument;
256+
} as unknown as AnyUnilingualFormInstrument;
257257

258258
const csvContent = unparse([
259259
['subjectID', 'date', 'status'],
@@ -277,7 +277,7 @@ describe('Zod4', () => {
277277
})
278278
)
279279
})
280-
} as AnyUnilingualFormInstrument;
280+
} as unknown as AnyUnilingualFormInstrument;
281281

282282
const csvContent = unparse([
283283
['subjectID', 'date', 'items'],

apps/web/src/utils/upload.ts

Lines changed: 103 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,26 @@ function parseSetEntry(entry: string): Set<string> {
6363
return set;
6464
}
6565

66+
const ansiEscapeCode = '[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]',
67+
zeroWidthCharacterExceptNewline =
68+
'\u0000-\u0009\u000B-\u0019\u001b\u180e\u009b\u00ad\u200b\u2028\u2029\ufeff\ufe00-\ufe0f';
69+
70+
const zeroWidthCharactersExceptNewline = new RegExp(
71+
// eslint-disable-next-line no-misleading-character-class
72+
'(?:' + ansiEscapeCode + ')|[' + zeroWidthCharacterExceptNewline + ']',
73+
'g'
74+
);
75+
76+
function nonVisibleCharChecker(entry: string | undefined) {
77+
if (!entry) {
78+
return null;
79+
}
80+
81+
zeroWidthCharactersExceptNewline.lastIndex = 0;
82+
const nonVisibleCharCheck = zeroWidthCharactersExceptNewline.exec(entry);
83+
return nonVisibleCharCheck;
84+
}
85+
6686
const ZOD_TYPE_NAMES = [
6787
'ZodNumber',
6888
'ZodString',
@@ -506,17 +526,40 @@ export namespace Zod3 {
506526
);
507527
}
508528

529+
let rowNumber = 1;
530+
531+
const regexResultSubject = nonVisibleCharChecker(dataLines[0][0]);
532+
const regexResultDate = nonVisibleCharChecker(dataLines[0][1]);
533+
534+
if (regexResultSubject !== null) {
535+
const charCode = regexResultSubject[0].charCodeAt(0).toString(16).toUpperCase().padStart(4, '0');
536+
return reject(
537+
new UploadError({
538+
en: `Subject ID at row ${rowNumber} contains non-visible character(s) (U+${charCode})`,
539+
fr: `L'ID du sujet à la ligne ${rowNumber} contient des caractères non visible(s) (U+${charCode})`
540+
})
541+
);
542+
}
543+
if (regexResultDate !== null) {
544+
const charCode = regexResultDate[0].charCodeAt(0).toString(16).toUpperCase().padStart(4, '0');
545+
return reject(
546+
new UploadError({
547+
en: `Date at row ${rowNumber} contains non-visible character(s) (U+${charCode})`,
548+
fr: `Date à la ligne ${rowNumber} contient des caractères non visible(s) (U+${charCode})`
549+
})
550+
);
551+
}
552+
509553
//remove sample data if included remove any mongolian vowel separators
510554
if (
511-
dataLines[0][0]?.replace(/[\u200B-\u200D\uFEFF\u180E]/g, '').trim() === INTERNAL_HEADERS_SAMPLE_DATA[0] &&
512-
dataLines[0][1]?.replace(/[\u200B-\u200D\uFEFF\u180E]/g, '').trim() === INTERNAL_HEADERS_SAMPLE_DATA[1]
555+
dataLines[0][0]?.trim() === INTERNAL_HEADERS_SAMPLE_DATA[0] &&
556+
dataLines[0][1]?.trim() === INTERNAL_HEADERS_SAMPLE_DATA[1]
513557
) {
514558
dataLines.shift();
515559
}
516560

517561
const result: FormTypes.Data[] = [];
518562

519-
let rowNumber = 1;
520563
for (const elements of dataLines) {
521564
const jsonLine: { [key: string]: unknown } = {};
522565
for (let i = 0; i < headers.length; i++) {
@@ -525,6 +568,19 @@ export namespace Zod3 {
525568
if (rawValue === '\n') {
526569
continue;
527570
}
571+
572+
//Check for non visible char in every row, return error if present
573+
const nonVisibleChars = nonVisibleCharChecker(rawValue);
574+
if (nonVisibleChars !== null) {
575+
const charCode = nonVisibleChars[0].charCodeAt(0).toString(16).toUpperCase().padStart(4, '0');
576+
return reject(
577+
new UploadError({
578+
en: `Value at row ${rowNumber} and column '${key}' contains non-visible character(s) (U+${charCode})`,
579+
fr: `La valeur à la ligne ${rowNumber} et colonne '${key}' contient des caractère(s) non visibles (U+${charCode})`
580+
})
581+
);
582+
}
583+
528584
if (shape[key] === undefined) {
529585
return reject(
530586
new UploadError({
@@ -541,7 +597,7 @@ export namespace Zod3 {
541597
return reject(
542598
new UploadError({
543599
en: `${error.description.en} at column name: '${key}' and row number '${rowNumber}'`,
544-
fr: `${error.description.fr} au nom de colonne : '${key}' et numéro de ligne '${rowNumber}`
600+
fr: `${error.description.fr} au nom de colonne : '${key}' et numéro de ligne '${rowNumber}'`
545601
})
546602
);
547603
}
@@ -560,7 +616,8 @@ export namespace Zod3 {
560616
console.error(`Failed to parse data: ${JSON.stringify(jsonLine)}`);
561617
return reject(
562618
new UploadError({
563-
en: 'Schema parsing failed: refer to the browser console for further details'
619+
en: 'Schema parsing failed: refer to the browser console for further details',
620+
fr: `Échec de l'analyse du schéma : reportez-vous à la console du navigateur pour plus de détails`
564621
})
565622
);
566623
}
@@ -824,24 +881,58 @@ export namespace Zod4 {
824881
}
825882

826883
//remove sample data if included (account for old mongolian vowel separator templates)
884+
// Return an error if non-visible characters are found
885+
886+
let rowNumber = 1;
887+
888+
const regexResultSubject = nonVisibleCharChecker(dataLines[0][0]);
889+
const regexResultDate = nonVisibleCharChecker(dataLines[0][1]);
890+
891+
if (regexResultSubject !== null) {
892+
const charCode = regexResultSubject[0].charCodeAt(0).toString(16).toUpperCase().padStart(4, '0');
893+
return reject(
894+
new UploadError({
895+
en: `Subject ID at row ${rowNumber} contains non-visible characters (U+${charCode})`,
896+
fr: `L'ID du sujet à la ligne ${rowNumber} contient des caractères non visibles (U+${charCode})`
897+
})
898+
);
899+
}
900+
if (regexResultDate !== null) {
901+
const charCode = regexResultDate[0].charCodeAt(0).toString(16).toUpperCase().padStart(4, '0');
902+
return reject(
903+
new UploadError({
904+
en: `Date at row ${rowNumber} contains non-visible characters (U+${charCode})`,
905+
fr: `Date à la ligne ${rowNumber} contient des caractères non visibles (U+${charCode})`
906+
})
907+
);
908+
}
827909

828910
if (
829-
dataLines[0][0]?.replace(/[\u200B-\u200D\uFEFF\u180E]/g, '').trim() === INTERNAL_HEADERS_SAMPLE_DATA[0] &&
830-
dataLines[0][1]?.replace(/[\u200B-\u200D\uFEFF\u180E]/g, '').trim() === INTERNAL_HEADERS_SAMPLE_DATA[1]
911+
dataLines[0][0]?.trim() === INTERNAL_HEADERS_SAMPLE_DATA[0] &&
912+
dataLines[0][1]?.trim() === INTERNAL_HEADERS_SAMPLE_DATA[1]
831913
) {
832914
dataLines.shift();
833915
}
834916

835917
const result: FormTypes.Data[] = [];
836918

837-
let rowNumber = 1;
838919
for (const elements of dataLines) {
839920
const jsonLine: { [key: string]: unknown } = {};
840921
for (let i = 0; i < headers.length; i++) {
841922
const key = headers[i]!.trim();
842-
const rawValue = elements[i]!.trim();
843-
if (rawValue === '\n') {
844-
continue;
923+
const cell = elements[i];
924+
const rawValue = cell == null ? '' : cell.trim();
925+
if (rawValue === '\n') continue;
926+
// Return error if any non‑visible character is present
927+
const nonVisibleChars = nonVisibleCharChecker(rawValue);
928+
if (nonVisibleChars !== null) {
929+
const charCode = nonVisibleChars[0].charCodeAt(0).toString(16).toUpperCase().padStart(4, '0');
930+
return reject(
931+
new UploadError({
932+
en: `Value at row ${rowNumber} and column '${key}' contains non-visible characters (U+${charCode})`,
933+
fr: `La valeur à la ligne ${rowNumber} et colonne '${key}' contient des caractères non visibles (U+${charCode})`
934+
})
935+
);
845936
}
846937
if (shape[key] === undefined) {
847938
return reject(
@@ -859,7 +950,7 @@ export namespace Zod4 {
859950
return reject(
860951
new UploadError({
861952
en: `${error.description.en} at column name: '${key}' and row number '${rowNumber}'`,
862-
fr: `${error.description.fr} au nom de colonne : '${key}' et numéro de ligne '${rowNumber}`
953+
fr: `${error.description.fr} au nom de colonne : '${key}' et numéro de ligne '${rowNumber}'`
863954
})
864955
);
865956
}

0 commit comments

Comments
 (0)