Skip to content

Commit fccff7e

Browse files
Jason3SCopilot
andauthored
fix: Add convertToBtrie method to trie-lib (#8562)
Signed-off-by: Jason Dent <Jason3S@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent a552b17 commit fccff7e

File tree

13 files changed

+353
-66
lines changed

13 files changed

+353
-66
lines changed

packages/cspell-dictionary-bundler-plugin/src/core/bunder.test.ts

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import fs from 'node:fs/promises';
2+
import { fileURLToPath } from 'node:url';
23

34
import type { CSpellVFS } from '@cspell/cspell-types';
5+
import type { ICSpellConfigFile } from 'cspell-config-lib';
46
import { describe, expect, test } from 'vitest';
57

6-
import { makeVfsUrl, populateVfs } from './bundler.ts';
8+
import { makeVfsUrl, populateVfs, resolveDictionaries } from './bundler.ts';
79

810
const fixturesUrl = new URL('../../tests/fixtures/file.txt', import.meta.url);
911

@@ -25,11 +27,43 @@ describe('populateVfs', () => {
2527
test('should populate the vfs with the content of the file', async () => {
2628
const vfs: CSpellVFS = {};
2729
const fileUrl = new URL('words.txt', fixturesUrl);
28-
const url = await populateVfs(vfs, fileUrl);
30+
const url = await populateVfs(vfs, { url: fileUrl });
2931
expect(url.href).toMatch(/^cspell-vfs:\/\/\//);
3032
expect(vfs[url.href]).toBeDefined();
3133
expect(vfs[url.href].encoding).toBe('base64');
3234
const content = Buffer.from(vfs[url.href].data as string, 'base64').toString();
3335
expect(content).toBe(await fs.readFile(fileUrl, 'utf8'));
3436
});
3537
});
38+
39+
describe('resolveDictionaries', () => {
40+
test('should resolve dictionary definitions and populate the vfs', async () => {
41+
const url = new URL('test/config.json', import.meta.url);
42+
const config: ICSpellConfigFile = {
43+
url,
44+
settings: {
45+
dictionaryDefinitions: [{ name: 'test-dict', path: fileURLToPath(new URL('words.txt', fixturesUrl)) }],
46+
},
47+
};
48+
49+
const settings = await resolveDictionaries(config, {
50+
convertToBTrie: true,
51+
minConvertSize: 0,
52+
compress: true,
53+
debug: false,
54+
});
55+
56+
expect(settings.dictionaryDefinitions).toBeDefined();
57+
expect(settings.dictionaryDefinitions?.[0].name).toBe('test-dict');
58+
expect(settings.dictionaryDefinitions?.[0].file).toBeUndefined();
59+
expect(settings.dictionaryDefinitions?.[0].btrie).toBeUndefined();
60+
expect(settings.dictionaryDefinitions?.[0].path).toEqual(expect.stringContaining('cspell-vfs:///'));
61+
62+
const vfsPath = settings.dictionaryDefinitions?.[0].path as string;
63+
const vfsUrl = new URL(vfsPath);
64+
expect(vfsUrl.protocol).toBe('cspell-vfs:');
65+
expect(vfsUrl.pathname).toEqual(expect.stringMatching(/\.btrie.gz$/));
66+
67+
expect(settings.vfs?.[vfsPath]).toBeDefined();
68+
});
69+
});

packages/cspell-dictionary-bundler-plugin/src/core/bundler.ts

Lines changed: 93 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
import { createHash } from 'node:crypto';
22
import fs from 'node:fs/promises';
33
import { createRequire } from 'node:module';
4-
import { pathToFileURL } from 'node:url';
4+
import nodePath from 'node:path';
5+
import { fileURLToPath, pathToFileURL } from 'node:url';
6+
import { gzipSync } from 'node:zlib';
57

68
import type { CSpellSettings, CSpellVFS } from '@cspell/cspell-types';
79
import { mergeConfig } from '@cspell/cspell-types';
810
import type { CSpellConfigFile, CSpellConfigFileReaderWriter, ICSpellConfigFile } from 'cspell-config-lib';
11+
import { convertToBTrie } from 'cspell-trie-lib';
912

10-
export interface CSpellDictionaryBundlerOptions {
11-
debug?: boolean;
12-
}
13+
import type { Options } from './options.ts';
14+
15+
export type CSpellDictionaryBundlerOptions = Required<
16+
Pick<Options, 'debug' | 'convertToBTrie' | 'minConvertSize' | 'compress'>
17+
>;
1318

1419
export class CSpellDictionaryBundler {
1520
#loadedConfigs = new Map<string, Promise<ICSpellConfigFile>>();
@@ -43,7 +48,7 @@ export class CSpellDictionaryBundler {
4348
const imports = await this.loadImports(config);
4449
const settings = mergeConfig(
4550
imports.map((f) => f.settings),
46-
await this.resolveDictionaries(config),
51+
await resolveDictionaries(config, this.#options),
4752
);
4853
delete settings.import;
4954
delete settings['$schema'];
@@ -53,29 +58,6 @@ export class CSpellDictionaryBundler {
5358
};
5459
}
5560

56-
async resolveDictionaries(config: ICSpellConfigFile): Promise<CSpellSettings> {
57-
const settings = { ...config.settings };
58-
if (!settings.dictionaryDefinitions) return settings;
59-
// Make a copy of the dictionary definitions and vfs to avoid mutating the original config file.
60-
const dictDefs = (settings.dictionaryDefinitions = [...settings.dictionaryDefinitions]);
61-
const vfs: CSpellVFS = (settings.vfs ??= Object.create(null));
62-
63-
for (let i = 0; i < dictDefs.length; ++i) {
64-
const def = dictDefs[i];
65-
if (!def.path) continue;
66-
const d = { ...def };
67-
dictDefs[i] = d;
68-
const url = new URL(def.btrie ?? def.path, config.url);
69-
if (url.protocol !== 'file:') continue;
70-
const vfsUrl = await populateVfs(vfs, url);
71-
delete d.file;
72-
delete d.btrie;
73-
d.path = vfsUrl.href;
74-
}
75-
76-
return settings;
77-
}
78-
7961
importConfig(url: URL, content?: string): Promise<CSpellConfigFile> {
8062
if (content && !isCodeFile(url)) {
8163
return Promise.resolve(this.reader.parse({ url, content }));
@@ -89,19 +71,64 @@ export class CSpellDictionaryBundler {
8971
}
9072
}
9173

74+
export async function resolveDictionaries(
75+
config: ICSpellConfigFile,
76+
options: CSpellDictionaryBundlerOptions,
77+
): Promise<CSpellSettings> {
78+
const settings = { ...config.settings };
79+
if (!settings.dictionaryDefinitions) return settings;
80+
if (config.url.protocol !== 'file:') return settings;
81+
// Make a copy of the dictionary definitions and vfs to avoid mutating the original config file.
82+
const dictDefs = (settings.dictionaryDefinitions = [...settings.dictionaryDefinitions]);
83+
const vfs: CSpellVFS = (settings.vfs ??= Object.create(null));
84+
const minConvertSize = options.minConvertSize ?? 1024;
85+
86+
for (let i = 0; i < dictDefs.length; ++i) {
87+
const def = dictDefs[i];
88+
const d = { ...def };
89+
if (!d.path) continue;
90+
dictDefs[i] = d;
91+
const url = resolvePath(d.btrie ?? d.path, config.url);
92+
if (url.protocol !== 'file:') continue;
93+
let file = await readFile({ url });
94+
file = options.convertToBTrie && fileLength(file) >= minConvertSize ? await convert(file) : file;
95+
file = options.compress && fileLength(file) >= minConvertSize ? compressFile(file) : file;
96+
const vfsUrl = await populateVfs(vfs, file);
97+
delete d.file;
98+
delete d.btrie;
99+
d.path = vfsUrl.href;
100+
}
101+
102+
return settings;
103+
}
104+
105+
interface FileReference {
106+
url: URL;
107+
content?: string | Uint8Array<ArrayBuffer>;
108+
}
109+
110+
interface FileResource extends FileReference {
111+
content: string | Uint8Array<ArrayBuffer>;
112+
}
113+
114+
async function convert(file: FileReference): Promise<FileResource> {
115+
const resource = await readFile(file);
116+
return convertToBTrie(resource, { optimize: true });
117+
}
118+
92119
/**
93120
* Load a file from the file system and populate the virtual file system with its content.
94121
*
95122
* @param vfs - The Virtual Files system data
96123
* @param url - The url to load and store.
97124
* @return The cspell-vfs url that was loaded.
98125
*/
99-
export async function populateVfs(vfs: CSpellVFS, url: URL): Promise<URL> {
100-
const content = await fs.readFile(url);
126+
export async function populateVfs(vfs: CSpellVFS, fileRef: FileReference): Promise<URL> {
127+
const { url, content } = await readFile(fileRef);
101128

102129
const hash = createHash('sha256').update(content).digest('hex');
103130

104-
const data = content.toString('base64');
131+
const data = typeof content === 'string' ? content : Buffer.from(content).toString('base64');
105132
const vfsUrl = makeVfsUrl(url, hash.slice(0, 16));
106133
vfs[vfsUrl.href] = {
107134
data,
@@ -150,3 +177,38 @@ const isCodeFileRegExp = /\.[cm]?(js|ts)$/i;
150177
function isCodeFile(url: URL): boolean {
151178
return isCodeFileRegExp.test(url.pathname);
152179
}
180+
181+
async function readFile(fileRef: FileReference): Promise<FileResource> {
182+
const url = fileRef.url;
183+
const content = fileRef.content ?? (await fs.readFile(url));
184+
return { url, content };
185+
}
186+
187+
/**
188+
* This is the approximate size of the file in bytes.
189+
* @param file
190+
* @returns
191+
*/
192+
function fileLength(file: FileResource): number {
193+
if (typeof file.content === 'string') {
194+
return file.content.length;
195+
}
196+
return file.content.byteLength;
197+
}
198+
199+
function compressFile(file: FileResource): FileResource {
200+
if (file.url.pathname.endsWith('.gz')) return file;
201+
const url = new URL(file.url.pathname + '.gz', file.url);
202+
const content = gzipSync(file.content);
203+
return { url, content };
204+
}
205+
206+
function resolvePath(path: string, base: URL): URL {
207+
if (isUrlLike(path)) {
208+
return new URL(path);
209+
}
210+
211+
const dir = fileURLToPath(new URL('./', base));
212+
const filePath = nodePath.resolve(dir, path);
213+
return pathToFileURL(filePath);
214+
}

packages/cspell-dictionary-bundler-plugin/src/core/core.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ export function resolveOptions(options: Options): OptionsResolved {
9999
include: options.include || [/.*cspell(?:[-]ext)?(\..*)?\.(?:jsonc?|ya?ml|toml)$/i],
100100
exclude: options.exclude || undefined,
101101
enforce: 'enforce' in options ? options.enforce : 'pre',
102+
convertToBTrie: options.convertToBTrie ?? true,
103+
minConvertSize: options.minConvertSize ?? 200,
104+
compress: options.compress ?? false,
102105
debug: !!options.debug,
103106
};
104107
}

packages/cspell-dictionary-bundler-plugin/src/core/options.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,22 @@ export interface Options {
44
include?: FilterPattern | undefined;
55
exclude?: FilterPattern | undefined;
66
enforce?: 'pre' | 'post' | undefined;
7+
/**
8+
* Convert the dictionary to a BTrie format.
9+
* This will increase the size of the output file, but will significantly reduce the time it takes to load the dictionary.
10+
* @default true
11+
*/
12+
convertToBTrie?: boolean | undefined;
13+
/**
14+
* The minimum size (in bytes) a dictionary must be to be converted to BTrie format.
15+
* @default 200
16+
*/
17+
minConvertSize?: number | undefined;
18+
19+
/**
20+
* Compress the inline data using gzip.
21+
*/
22+
compress?: boolean | undefined;
23+
724
debug?: boolean;
825
}

packages/cspell-dictionary-bundler-plugin/tests/__snapshots__/rollup.test.ts.snap

Lines changed: 3 additions & 3 deletions
Large diffs are not rendered by default.

packages/cspell-trie-lib/api/api.d.ts

Lines changed: 39 additions & 26 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/cspell-trie-lib/src/__snapshots__/index.test.ts.snap

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,20 @@ exports[`Validate index.ts > Track changes to the API. 1`] = `
1616
"buildTrie",
1717
"buildTrieFast",
1818
"consolidate",
19+
"convertToBTrie",
1920
"countNodes",
2021
"countWords",
2122
"createDictionaryLineParser",
2223
"createTrieRoot",
2324
"createTrieRootFromList",
2425
"createWeightedMap",
26+
"decodeFile",
2527
"decodeTrie",
2628
"defaultTrieInfo",
2729
"defaultTrieOptions",
2830
"editDistance",
2931
"editDistanceWeighted",
32+
"encodeITrieToBTrie",
3033
"encodeTrieDataToBTrie",
3134
"expandCharacterSet",
3235
"findNode",

packages/cspell-trie-lib/src/lib/TrieBlob/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ export {
66
} from './createTrieBlob.ts';
77
export { TrieBlob } from './TrieBlob.ts';
88
export { isBTrieData } from './TrieBlobEncoder.ts';
9-
export { decodeBTrie, encodeTrieDataToBTrie } from './trieDataEncoder.ts';
9+
export { decodeBTrie, encodeITrieToBTrie, encodeTrieDataToBTrie } from './trieDataEncoder.ts';

packages/cspell-trie-lib/src/lib/TrieBlob/trieDataEncoder.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
import type { BuildOptions } from '../BuildOptions.ts';
2+
import type { ITrie } from '../ITrie.ts';
23
import type { TrieData } from '../TrieData.ts';
34
import { TrieBlob } from './TrieBlob.ts';
45
import { TrieBlobBuilder } from './TrieBlobBuilder.ts';
56

7+
export function encodeITrieToBTrie(trie: ITrie, buildOptions?: BuildOptions): Uint8Array<ArrayBuffer> {
8+
return encodeTrieDataToBTrie(trie.data, buildOptions);
9+
}
10+
611
export function encodeTrieDataToBTrie(data: TrieData, buildOptions?: BuildOptions): Uint8Array<ArrayBuffer> {
712
const needToBuild = buildOptions?.optimize || buildOptions?.useStringTable;
813
if (!needToBuild && data.encodeToBTrie) {

0 commit comments

Comments
 (0)