Skip to content

Commit fdf7851

Browse files
committed
add more tests and handle edge cases
1 parent 54acb19 commit fdf7851

File tree

2 files changed

+67
-3
lines changed

2 files changed

+67
-3
lines changed

ext/js/dictionary/dictionary-importer.js

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,7 @@ export class DictionaryImporter {
961961
let entryStart = -1;
962962
let accumulated = '';
963963
let hasTopLevelArray = false;
964+
let needsComma = false;
964965

965966
for (;;) {
966967
const {done, value} = await reader.read();
@@ -1001,6 +1002,9 @@ export class DictionaryImporter {
10011002
if (depth === 1) {
10021003
hasTopLevelArray = true;
10031004
} else if (depth === 2) {
1005+
if (needsComma) {
1006+
throw new Error(`Dictionary has invalid data in '${file.filename}'`);
1007+
}
10041008
entryStart = i;
10051009
accumulated = '';
10061010
}
@@ -1026,6 +1030,7 @@ export class DictionaryImporter {
10261030
}
10271031
await onEntry(parsed);
10281032
entryStart = -1;
1033+
needsComma = true;
10291034
}
10301035
break;
10311036
case 0x7D: // }
@@ -1035,9 +1040,16 @@ export class DictionaryImporter {
10351040
depth--;
10361041
break;
10371042
default:
1038-
// At depth 1, only whitespace and commas are valid between entries
1039-
if (depth === 1 && ch !== 0x2C && ch !== 0x20 && ch !== 0x09 && ch !== 0x0A && ch !== 0x0D) {
1040-
throw new Error(`Dictionary has invalid data in '${file.filename}'`);
1043+
// At depth 1, only whitespace (0x20 space, 0x09 tab, 0x0A LF, 0x0D CR) and commas are valid between entries
1044+
if (depth === 1) {
1045+
if (ch === 0x2C) {
1046+
if (!needsComma) {
1047+
throw new Error(`Dictionary has invalid data in '${file.filename}'`);
1048+
}
1049+
needsComma = false;
1050+
} else if (ch !== 0x20 && ch !== 0x09 && ch !== 0x0A && ch !== 0x0D) {
1051+
throw new Error(`Dictionary has invalid data in '${file.filename}'`);
1052+
}
10411053
}
10421054
break;
10431055
}

test/database.test.js

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* along with this program. If not, see <https://www.gnu.org/licenses/>.
1717
*/
1818

19+
import {BlobWriter, TextReader, ZipWriter} from '@zip.js/zip.js';
1920
import {IDBFactory, IDBKeyRange} from 'fake-indexeddb';
2021
import {readFileSync} from 'node:fs';
2122
import {fileURLToPath} from 'node:url';
@@ -43,6 +44,22 @@ async function createTestDictionaryArchiveData(dictionary, dictionaryName) {
4344
return await createDictionaryArchiveData(dictionaryDirectory, dictionaryName);
4445
}
4546

47+
/**
48+
* Creates a dictionary zip archive with raw file contents, bypassing JSON parse/re-serialize.
49+
* This allows testing with intentionally malformed JSON that parseJson would reject.
50+
* @param {Record<string, string>} files Map of filename to raw string content
51+
* @returns {Promise<ArrayBuffer>}
52+
*/
53+
async function createRawDictionaryArchiveData(files) {
54+
const zipFileWriter = new BlobWriter();
55+
const zipWriter = new ZipWriter(zipFileWriter, {level: 0});
56+
for (const [fileName, content] of Object.entries(files)) {
57+
await zipWriter.add(fileName, new TextReader(content));
58+
}
59+
const blob = await zipWriter.close();
60+
return await blob.arrayBuffer();
61+
}
62+
4663
/**
4764
* @param {import('vitest').ExpectStatic} expect
4865
* @param {import('dictionary-importer').OnProgressCallback} [onProgress]
@@ -179,6 +196,41 @@ describe('Database', () => {
179196
});
180197
});
181198
});
199+
describe('Invalid raw dictionaries', () => {
200+
const indexJson = JSON.stringify({title: 'Raw Test', format: 3, revision: 'test', sequenced: true});
201+
const validEntry = '["打","だ","n","n",1,["definition"],1,""]';
202+
const rawInvalidDictionaries = [
203+
{name: 'missing comma between entries', termBank: `[${validEntry}${validEntry}]`},
204+
{name: 'leading comma', termBank: `[,${validEntry}]`},
205+
{name: 'double comma', termBank: `[${validEntry},,${validEntry}]`},
206+
{name: 'trailing garbage after array', termBank: `[${validEntry}]garbage`},
207+
{name: 'leading garbage before array', termBank: `garbage[${validEntry}]`},
208+
{name: 'concatenated arrays', termBank: `[${validEntry}][${validEntry}]`},
209+
{name: 'empty file', termBank: ''},
210+
{name: 'whitespace only', termBank: ' '},
211+
{name: 'just a number', termBank: '123'},
212+
{name: 'just a string', termBank: '"hello"'},
213+
{name: 'just null', termBank: 'null'},
214+
{name: 'unclosed array', termBank: `[${validEntry}`},
215+
{name: 'unclosed entry', termBank: '[["a","b"'},
216+
];
217+
describe.each(rawInvalidDictionaries)('Raw invalid: $name', ({termBank}) => {
218+
test('Has invalid data', async ({expect}) => {
219+
const dictionaryDatabase = new DictionaryDatabase();
220+
await dictionaryDatabase.prepare();
221+
222+
const testDictionarySource = await createRawDictionaryArchiveData({
223+
'index.json': indexJson,
224+
'term_bank_1.json': termBank,
225+
});
226+
227+
/** @type {import('dictionary-importer').ImportDetails} */
228+
const importDetails = {prefixWildcardsSupported: false, yomitanVersion: '0.0.0.0'};
229+
await expect.soft(createDictionaryImporter(expect).importDictionary(dictionaryDatabase, testDictionarySource, importDetails)).rejects.toThrow('Dictionary has invalid data');
230+
await dictionaryDatabase.close();
231+
});
232+
});
233+
});
182234
describe('Database valid usage', () => {
183235
const testDataFilePath = join(dirname, 'data/database-test-cases.json');
184236
/** @type {import('test/database').DatabaseTestData} */

0 commit comments

Comments
 (0)