add more tests and handle edge cases

Casheeew · Casheeew · commit fdf7851b2bd2 · 2026-02-26T11:57:12.000+09:00
diff --git a/ext/js/dictionary/dictionary-importer.js b/ext/js/dictionary/dictionary-importer.js
@@ -961,6 +961,7 @@ export class DictionaryImporter {
         let entryStart = -1;
         let accumulated = '';
         let hasTopLevelArray = false;
+        let needsComma = false;
 
         for (;;) {
             const {done, value} = await reader.read();
@@ -1001,6 +1002,9 @@ export class DictionaryImporter {
                         if (depth === 1) {
                             hasTopLevelArray = true;
                         } else if (depth === 2) {
+                            if (needsComma) {
+                                throw new Error(`Dictionary has invalid data in '${file.filename}'`);
+                            }
                             entryStart = i;
                             accumulated = '';
                         }
@@ -1026,6 +1030,7 @@ export class DictionaryImporter {
                             }
                             await onEntry(parsed);
                             entryStart = -1;
+                            needsComma = true;
                         }
                         break;
                     case 0x7D: // }
@@ -1035,9 +1040,16 @@ export class DictionaryImporter {
                         depth--;
                         break;
                     default:
-                        // At depth 1, only whitespace and commas are valid between entries
-                        if (depth === 1 && ch !== 0x2C && ch !== 0x20 && ch !== 0x09 && ch !== 0x0A && ch !== 0x0D) {
-                            throw new Error(`Dictionary has invalid data in '${file.filename}'`);
+                        // At depth 1, only whitespace (0x20 space, 0x09 tab, 0x0A LF, 0x0D CR) and commas are valid between entries
+                        if (depth === 1) {
+                            if (ch === 0x2C) {
+                                if (!needsComma) {
+                                    throw new Error(`Dictionary has invalid data in '${file.filename}'`);
+                                }
+                                needsComma = false;
+                            } else if (ch !== 0x20 && ch !== 0x09 && ch !== 0x0A && ch !== 0x0D) {
+                                throw new Error(`Dictionary has invalid data in '${file.filename}'`);
+                            }
                         }
                         break;
                 }
diff --git a/test/database.test.js b/test/database.test.js
@@ -16,6 +16,7 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
+import {BlobWriter, TextReader, ZipWriter} from '@zip.js/zip.js';
 import {IDBFactory, IDBKeyRange} from 'fake-indexeddb';
 import {readFileSync} from 'node:fs';
 import {fileURLToPath} from 'node:url';
@@ -43,6 +44,22 @@ async function createTestDictionaryArchiveData(dictionary, dictionaryName) {
     return await createDictionaryArchiveData(dictionaryDirectory, dictionaryName);
 }
 
+/**
+ * Creates a dictionary zip archive with raw file contents, bypassing JSON parse/re-serialize.
+ * This allows testing with intentionally malformed JSON that parseJson would reject.
+ * @param {Record<string, string>} files Map of filename to raw string content
+ * @returns {Promise<ArrayBuffer>}
+ */
+async function createRawDictionaryArchiveData(files) {
+    const zipFileWriter = new BlobWriter();
+    const zipWriter = new ZipWriter(zipFileWriter, {level: 0});
+    for (const [fileName, content] of Object.entries(files)) {
+        await zipWriter.add(fileName, new TextReader(content));
+    }
+    const blob = await zipWriter.close();
+    return await blob.arrayBuffer();
+}
+
 /**
  * @param {import('vitest').ExpectStatic} expect
  * @param {import('dictionary-importer').OnProgressCallback} [onProgress]
@@ -179,6 +196,41 @@ describe('Database', () => {
             });
         });
     });
+    describe('Invalid raw dictionaries', () => {
+        const indexJson = JSON.stringify({title: 'Raw Test', format: 3, revision: 'test', sequenced: true});
+        const validEntry = '["打","だ","n","n",1,["definition"],1,""]';
+        const rawInvalidDictionaries = [
+            {name: 'missing comma between entries', termBank: `[${validEntry}${validEntry}]`},
+            {name: 'leading comma', termBank: `[,${validEntry}]`},
+            {name: 'double comma', termBank: `[${validEntry},,${validEntry}]`},
+            {name: 'trailing garbage after array', termBank: `[${validEntry}]garbage`},
+            {name: 'leading garbage before array', termBank: `garbage[${validEntry}]`},
+            {name: 'concatenated arrays', termBank: `[${validEntry}][${validEntry}]`},
+            {name: 'empty file', termBank: ''},
+            {name: 'whitespace only', termBank: '   '},
+            {name: 'just a number', termBank: '123'},
+            {name: 'just a string', termBank: '"hello"'},
+            {name: 'just null', termBank: 'null'},
+            {name: 'unclosed array', termBank: `[${validEntry}`},
+            {name: 'unclosed entry', termBank: '[["a","b"'},
+        ];
+        describe.each(rawInvalidDictionaries)('Raw invalid: $name', ({termBank}) => {
+            test('Has invalid data', async ({expect}) => {
+                const dictionaryDatabase = new DictionaryDatabase();
+                await dictionaryDatabase.prepare();
+
+                const testDictionarySource = await createRawDictionaryArchiveData({
+                    'index.json': indexJson,
+                    'term_bank_1.json': termBank,
+                });
+
+                /** @type {import('dictionary-importer').ImportDetails} */
+                const importDetails = {prefixWildcardsSupported: false, yomitanVersion: '0.0.0.0'};
+                await expect.soft(createDictionaryImporter(expect).importDictionary(dictionaryDatabase, testDictionarySource, importDetails)).rejects.toThrow('Dictionary has invalid data');
+                await dictionaryDatabase.close();
+            });
+        });
+    });
     describe('Database valid usage', () => {
         const testDataFilePath = join(dirname, 'data/database-test-cases.json');
         /** @type {import('test/database').DatabaseTestData} */