diff --git a/changelog.d/20250915_144507_markiewicz_cache_json.md b/changelog.d/20250915_144507_markiewicz_cache_json.md new file mode 100644 index 00000000..ffeb6de5 --- /dev/null +++ b/changelog.d/20250915_144507_markiewicz_cache_json.md @@ -0,0 +1,47 @@ + + + +### Changed + +- Parsed JSON files are now cached to reduce I/O and parsing costs. + + + + + + diff --git a/src/files/json.test.ts b/src/files/json.test.ts index 4a71403c..f9cf7566 100644 --- a/src/files/json.test.ts +++ b/src/files/json.test.ts @@ -3,6 +3,7 @@ import type { BIDSFile } from '../types/filetree.ts' import type { FileIgnoreRules } from './ignore.ts' import { testAsyncFileAccess } from './access.test.ts' +import { pathsToTree } from '../files/filetree.ts' import { loadJSON } from './json.ts' function encodeUTF16(text: string) { @@ -18,9 +19,12 @@ function encodeUTF16(text: string) { return buffer } -function makeFile(text: string, encoding: string): BIDSFile { +function makeFile(path: string, text: string, encoding: string): BIDSFile { const bytes = encoding === 'utf-8' ? new TextEncoder().encode(text) : encodeUTF16(text) + const file = pathsToTree([path]).get(path) as BIDSFile return { + path: file.path, + parent: file.parent, readBytes: async (size: number) => { return new Uint8Array(bytes) }, @@ -30,13 +34,13 @@ function makeFile(text: string, encoding: string): BIDSFile { Deno.test('Test JSON error conditions', async (t) => { await t.step('Load valid JSON', async () => { - const JSONfile = makeFile('{"a": 1}', 'utf-8') + const JSONfile = makeFile('/valid-contents.json', '{"a": 1}', 'utf-8') const result = await loadJSON(JSONfile) assertObjectMatch(result, { a: 1 }) }) await t.step('Error on BOM', async () => { - const BOMfile = makeFile('\uFEFF{"a": 1}', 'utf-8') + const BOMfile = makeFile('/BOM.json', '\uFEFF{"a": 1}', 'utf-8') let error: any = undefined await loadJSON(BOMfile).catch((e) => { error = e @@ -45,7 +49,7 @@ Deno.test('Test JSON error conditions', async (t) => { }) await t.step('Error on UTF-16', async () => { - const UTF16file = makeFile('{"a": 1}', 'utf-16') + const UTF16file = makeFile('/utf16.json', '{"a": 1}', 'utf-16') let error: any = undefined await loadJSON(UTF16file).catch((e) => { error = e @@ -54,13 +58,14 @@ Deno.test('Test JSON error conditions', async (t) => { }) await t.step('Error on invalid JSON syntax', async () => { - const badJSON = makeFile('{"a": 1]', 'utf-8') + const badJSON = makeFile('/bad-syntax.json', '{"a": 1]', 'utf-8') let error: any = undefined await loadJSON(badJSON).catch((e) => { error = e }) assertObjectMatch(error, { code: 'JSON_INVALID' }) }) + loadJSON.cache.clear() }) testAsyncFileAccess('Test file access errors for loadJSON', loadJSON) diff --git a/src/files/json.ts b/src/files/json.ts index a2d79fb0..a9686386 100644 --- a/src/files/json.ts +++ b/src/files/json.ts @@ -1,3 +1,4 @@ +import { filememoizeAsync } from '../utils/memoize.ts' import type { BIDSFile } from '../types/filetree.ts' import { readBytes } from './access.ts' @@ -21,7 +22,7 @@ async function readJSONText(file: BIDSFile): Promise { } } -export async function loadJSON(file: BIDSFile): Promise> { +async function _loadJSON(file: BIDSFile): Promise> { const text = await readJSONText(file) // Raise encoding errors let parsedText try { @@ -37,3 +38,5 @@ export async function loadJSON(file: BIDSFile): Promise> } return parsedText } + +export const loadJSON = filememoizeAsync(_loadJSON) diff --git a/src/schema/walk.ts b/src/schema/walk.ts index c9d6157f..d8023723 100644 --- a/src/schema/walk.ts +++ b/src/schema/walk.ts @@ -2,6 +2,7 @@ import { BIDSContext, type BIDSContextDataset } from './context.ts' import type { BIDSFile, FileTree } from '../types/filetree.ts' import type { DatasetIssues } from '../issues/datasetIssues.ts' import { loadTSV } from '../files/tsv.ts' +import { loadJSON } from '../files/json.ts' function* quickWalk(dir: FileTree): Generator { for (const file of dir.files) { @@ -49,6 +50,7 @@ async function* _walkFileTree( } } loadTSV.cache.delete(fileTree.path) + loadJSON.cache.delete(fileTree.path) } /** Walk all files in the dataset and construct a context for each one */