diff --git a/packages/cspell-lib/api/api.d.ts b/packages/cspell-lib/api/api.d.ts index 2ea90bfdac7d..193db3c06441 100644 --- a/packages/cspell-lib/api/api.d.ts +++ b/packages/cspell-lib/api/api.d.ts @@ -2470,6 +2470,7 @@ declare function getFileTypesForExt(ext: string): FileTypeId[]; declare function findMatchingFileTypes(filename: string): FileTypeId[]; //#endregion //#region ../cspell-trie-lib/dist/index.d.ts +//#endregion //#region src/lib/distance/weightedMaps.d.ts /** diff --git a/packages/cspell-lib/src/lib/textValidation/__snapshots__/docValidator.test.ts.snap b/packages/cspell-lib/src/lib/textValidation/__snapshots__/docValidator.test.ts.snap index 31eb2e16e66f..75efa4ceb149 100644 --- a/packages/cspell-lib/src/lib/textValidation/__snapshots__/docValidator.test.ts.snap +++ b/packages/cspell-lib/src/lib/textValidation/__snapshots__/docValidator.test.ts.snap @@ -16,6 +16,10 @@ exports[`docValidator suggestions > suggestions 1`] = ` "word": "orangs", "wordAdjustedToMatchCase": "Orangs", }, + { + "word": "orangey", + "wordAdjustedToMatchCase": "Orangey", + }, { "word": "orange's", }, @@ -26,9 +30,5 @@ exports[`docValidator suggestions > suggestions 1`] = ` "word": "ranges", "wordAdjustedToMatchCase": "Ranges", }, - { - "word": "orangier", - "wordAdjustedToMatchCase": "Orangier", - }, ] `; diff --git a/packages/cspell-tools/src/__snapshots__/build.test.ts.snap b/packages/cspell-tools/src/__snapshots__/build.test.ts.snap index ecd86223a088..405455d3a1f9 100644 Binary files a/packages/cspell-tools/src/__snapshots__/build.test.ts.snap and b/packages/cspell-tools/src/__snapshots__/build.test.ts.snap differ diff --git a/packages/cspell-tools/src/app.ts b/packages/cspell-tools/src/app.ts index 496abd8bd47b..28789780b57b 100644 --- a/packages/cspell-tools/src/app.ts +++ b/packages/cspell-tools/src/app.ts @@ -122,6 +122,8 @@ export async function run(program: Command, argv: string[], flags?: FeatureFlags .command('btrie [files...]') .description('Generate BTrie files from word list files.') .option('-n, --no-compress', 'By default the files are GZipped, this will turn off GZ compression.') + .option('--no-optimize', 'Do not try to optimize.') + .option('--no-use-string-table', 'Do not use a string table in the BTrie.') .action(generateBTrie); program diff --git a/packages/cspell-tools/src/bTrie.ts b/packages/cspell-tools/src/bTrie.ts index 18411fdbad0e..47981ca0c8c6 100644 --- a/packages/cspell-tools/src/bTrie.ts +++ b/packages/cspell-tools/src/bTrie.ts @@ -4,12 +4,9 @@ import zlib from 'node:zlib'; const gzip = promisify(zlib.gzip); +import type { GenerateBTrieOptions } from './compiler/bTrie.ts'; import { createBTrieFromFile } from './compiler/bTrie.ts'; -interface GenerateBTrieOptions { - compress?: boolean; -} - export function generateBTrie(files: string[], options: GenerateBTrieOptions): Promise { return generateBTrieFromFiles(files, options); } @@ -19,7 +16,7 @@ async function generateBTrieFromFiles(files: string[], options: GenerateBTrieOpt console.log(`Generating BTrie for ${files.length} file(s).`); for (const file of files) { console.log(`Processing file: ${file}`); - const btrie = await createBTrieFromFile(file); + const btrie = await createBTrieFromFile(file, options); let outFile = bTrieFileName(file); if (compress) { const gzipped = await gzip(btrie); diff --git a/packages/cspell-tools/src/build.test.ts b/packages/cspell-tools/src/build.test.ts index b983c9cb9497..e7e8ad79be87 100644 --- a/packages/cspell-tools/src/build.test.ts +++ b/packages/cspell-tools/src/build.test.ts @@ -2,9 +2,10 @@ import { beforeEach, describe, expect, test } from 'vitest'; import { build } from './build.ts'; import { setLogger } from './compiler/index.ts'; -import { readTextFile } from './compiler/readers/readTextFile.ts'; +import { readFile, readTextFile } from './compiler/readers/readTextFile.ts'; import { spyOnConsole } from './test/console.ts'; import { createTestHelper } from './test/TestHelper.ts'; +import { hexDump } from './util/hexDump.ts'; const helper = createTestHelper(import.meta.url); @@ -45,8 +46,9 @@ describe('build action', () => { const shouldExist = builds.filter((a) => !a.startsWith('!')); const shouldNotExist = builds.filter((a) => a.startsWith('!')).map((a) => a.slice(1)); for (const build of shouldExist) { - const content = await readTextFile(t(build)); - expect(content).toMatchSnapshot(); + const content = await readFile(t(build)); + const text = isBinary(content) ? hexDump(content) : new TextDecoder('utf-8').decode(content); + expect(text).toMatchSnapshot(); } for (const build of shouldNotExist) { const found = await helper.fileExists(t(build)); @@ -71,3 +73,7 @@ function f(...parts: string[]): string { function cfgYaml(...parts: string[]): string { return helper.resolveFixture(...parts, 'cspell-tools.config.yaml'); } + +function isBinary(data: Uint8Array): boolean { + return data.includes(0); +} diff --git a/packages/cspell-tools/src/compiler/bTrie.ts b/packages/cspell-tools/src/compiler/bTrie.ts index ed63e72a12ce..2552ed80c279 100644 --- a/packages/cspell-tools/src/compiler/bTrie.ts +++ b/packages/cspell-tools/src/compiler/bTrie.ts @@ -2,10 +2,16 @@ import { encodeTrieDataToBTrie } from 'cspell-trie-lib'; import { createReader } from './Reader.ts'; -export async function createBTrieFromFile(file: string): Promise { +export interface GenerateBTrieOptions { + compress?: boolean; + optimize?: boolean; + useStringTable?: boolean; +} + +export async function createBTrieFromFile(file: string, buildOptions: GenerateBTrieOptions): Promise { const reader = await createReader(file, {}); const trie = reader.toTrie(); - return encodeTrieDataToBTrie(trie.data); + return encodeTrieDataToBTrie(trie.data, buildOptions); } diff --git a/packages/cspell-tools/src/compiler/readers/readHunspellFiles.ts b/packages/cspell-tools/src/compiler/readers/readHunspellFiles.ts index eec893893912..679ab6595458 100644 --- a/packages/cspell-tools/src/compiler/readers/readHunspellFiles.ts +++ b/packages/cspell-tools/src/compiler/readers/readHunspellFiles.ts @@ -31,7 +31,7 @@ export async function readHunspellFiles(filename: string, options: ReaderOptions }, toTrie: () => { if (trie) return trie; - trie = parseDictionary(lines(), { stripCaseAndAccents: false }); + trie = parseDictionary(lines(), { stripCaseAndAccents: false, optimize: true }); return trie; }, }; diff --git a/packages/cspell-tools/src/compiler/readers/readTextFile.ts b/packages/cspell-tools/src/compiler/readers/readTextFile.ts index fff511485fef..e71c5217abf6 100644 --- a/packages/cspell-tools/src/compiler/readers/readTextFile.ts +++ b/packages/cspell-tools/src/compiler/readers/readTextFile.ts @@ -1,5 +1,3 @@ -import assert from 'node:assert'; -import { Buffer } from 'node:buffer'; import { promises as fs } from 'node:fs'; import { decompress } from '../../gzip/index.ts'; @@ -7,10 +5,16 @@ import { decompress } from '../../gzip/index.ts'; const isGzFile = /\.gz$/; export function readTextFile(filename: string): Promise { + const content = readFile(filename).then((buffer) => { + return new TextDecoder('utf-8').decode(buffer); + }); + return content; +} + +export function readFile(filename: string): Promise> { const content = fs .readFile(filename) - .then(async (buffer) => (isGzFile.test(filename) ? decompress(buffer) : buffer)) - .then((buffer) => (assertIsBuffer(buffer), buffer.toString('utf8'))); + .then(async (buffer) => (isGzFile.test(filename) ? decompress(buffer) : buffer)); return content; } @@ -18,7 +22,3 @@ export async function readTextFileLines(filename: string): Promise { const content = await readTextFile(filename); return content.split('\n'); } - -function assertIsBuffer(value: unknown): asserts value is Buffer { - assert(Buffer.isBuffer(value)); -} diff --git a/packages/cspell-tools/src/compiler/readers/textFileReader.ts b/packages/cspell-tools/src/compiler/readers/textFileReader.ts index cb945429b1d3..42627fd53e4e 100644 --- a/packages/cspell-tools/src/compiler/readers/textFileReader.ts +++ b/packages/cspell-tools/src/compiler/readers/textFileReader.ts @@ -15,7 +15,7 @@ export async function textFileReader(filename: string): Promise { lines: words, toTrie: () => { if (trie) return trie; - trie = parseDictionary(words, { stripCaseAndAccents: false }); + trie = parseDictionary(words, { stripCaseAndAccents: false, optimize: true }); return trie; }, }; diff --git a/packages/cspell-tools/src/gzip/compressFiles.ts b/packages/cspell-tools/src/gzip/compressFiles.ts index 8722832ca671..d31abdd65b50 100644 --- a/packages/cspell-tools/src/gzip/compressFiles.ts +++ b/packages/cspell-tools/src/gzip/compressFiles.ts @@ -43,10 +43,12 @@ function fixOSSystemID(zBuf: Uint8Array, os: OSFlags = OSFlags.Unix): Uint8Array return zBuf; } -export async function decompress(buf: Uint8Array | Buffer, encoding?: undefined): Promise; +type U8Array = Uint8Array; + +export async function decompress(buf: Uint8Array | Buffer, encoding?: undefined): Promise; export async function decompress(buf: Uint8Array | Buffer, encoding: 'utf8'): Promise; -export async function decompress(buf: Uint8Array | Buffer, encoding: 'utf8' | undefined): Promise; -export async function decompress(buf: Uint8Array | Buffer, encoding?: 'utf8'): Promise { +export async function decompress(buf: Uint8Array | Buffer, encoding: 'utf8' | undefined): Promise; +export async function decompress(buf: Uint8Array | Buffer, encoding?: 'utf8'): Promise { const dBuf = gunzip(buf); if (!encoding) return dBuf; return (await dBuf).toString(encoding); diff --git a/packages/cspell-tools/src/util/hexDump.ts b/packages/cspell-tools/src/util/hexDump.ts new file mode 100644 index 000000000000..da43bedaf366 --- /dev/null +++ b/packages/cspell-tools/src/util/hexDump.ts @@ -0,0 +1,16 @@ +/* eslint-disable unicorn/prefer-code-point */ +export function hexDump(buffer: Uint8Array): string { + const lines: string[] = []; + const chunkSize = 16; + for (let i = 0; i < buffer.length; i += chunkSize) { + const chunk = buffer.subarray(i, i + chunkSize); + lines.push(hexLine(i, chunk)); + } + return lines.join('\n'); +} + +function hexLine(offset: number, chunk: Uint8Array): string { + const hex = [...chunk].map((b, i) => b.toString(16).padStart(2, '0') + ((i & 3) === 3 ? ' ' : '')).join(' '); + const ascii = [...chunk].map((b) => (b >= 32 && b <= 126 ? String.fromCharCode(b) : '.')).join(''); + return offset.toString(16).padStart(8, '0') + ' ' + hex.padEnd(52, ' ') + ' ' + ascii; +} diff --git a/packages/cspell-trie-lib/api/api.d.ts b/packages/cspell-trie-lib/api/api.d.ts index 7dd01b6938a6..a89873517dd2 100644 --- a/packages/cspell-trie-lib/api/api.d.ts +++ b/packages/cspell-trie-lib/api/api.d.ts @@ -1,6 +1,20 @@ import { DictionaryDefinitionAugmented, SuggestionCostMapDef } from "@cspell/cspell-types"; import { Operator } from "@cspell/cspell-pipe/sync"; +//#region src/lib/BuildOptions.d.ts +interface BuildOptions { + /** + * Optimize the trie for size by merging duplicate sub-tries and using a String Table. + * @default false + */ + optimize?: boolean | undefined; + /** + * Use a string table to reduce memory usage. + * @default false + */ + useStringTable?: boolean | undefined; +} +//#endregion //#region src/lib/distance/weightedMaps.d.ts /** @@ -171,6 +185,7 @@ interface ITrieNodeRoot extends ITrieNode { readonly forbidPrefix: string; readonly compoundFix: string; readonly caseInsensitivePrefix: string; + readonly suggestionPrefix: string; readonly hasForbiddenWords: boolean; readonly hasCompoundWords: boolean; readonly hasNonStrictWords: boolean; @@ -577,7 +592,7 @@ interface FindWordOptions { type FindWordOptionsRO = Readonly; //#endregion //#region src/lib/buildITrie.d.ts -declare function buildITrieFromWords(words: Iterable, info?: PartialTrieInfo): ITrie; +declare function buildITrieFromWords(words: Iterable, info?: PartialTrieInfo, buildOptions?: BuildOptions): ITrie; //#endregion //#region src/lib/consolidate.d.ts /** @@ -725,7 +740,7 @@ declare class Trie { } //#endregion //#region src/lib/SimpleDictionaryParser.d.ts -interface ParseDictionaryOptions { +interface ParseDictionaryOptions extends BuildOptions { compoundCharacter: string; optionalCompoundCharacter: string; forbiddenPrefix: string; @@ -791,6 +806,16 @@ interface ParseDictionaryOptions { * @default false */ makeWordsForbidden?: boolean; + /** + * Optimize the trie for size by merging duplicate sub-tries and using a String Table. + * @default false + */ + optimize?: boolean; + /** + * Use a string table to reduce memory usage. + * @default false + */ + useStringTable?: boolean; } /** * Normalizes a dictionary words based upon prefix / suffixes. @@ -811,7 +836,7 @@ declare function parseDictionaryLegacy(text: string | string[], options?: Partia declare function parseDictionary(text: string | Iterable, options?: Partial): ITrie; //#endregion //#region src/lib/TrieBlob/trieDataEncoder.d.ts -declare function encodeTrieDataToBTrie(data: TrieData): Uint8Array; +declare function encodeTrieDataToBTrie(data: TrieData, buildOptions?: BuildOptions): Uint8Array; //#endregion //#region src/lib/TrieBuilder.d.ts /** diff --git a/packages/cspell-trie-lib/package.json b/packages/cspell-trie-lib/package.json index d51f9fd7f945..eea002224ec9 100644 --- a/packages/cspell-trie-lib/package.json +++ b/packages/cspell-trie-lib/package.json @@ -66,6 +66,7 @@ }, "devDependencies": { "@cspell/cspell-pipe": "workspace:*", + "@cspell/dict-cpp": "^7.0.2", "@cspell/dict-en_us": "^4.4.27", "@cspell/dict-es-es": "^3.0.8", "@cspell/dict-nl-nl": "^2.4.2", diff --git a/packages/cspell-trie-lib/src/lib/BuildOptions.ts b/packages/cspell-trie-lib/src/lib/BuildOptions.ts new file mode 100644 index 000000000000..00511cec8c92 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/BuildOptions.ts @@ -0,0 +1,13 @@ +export interface BuildOptions { + /** + * Optimize the trie for size by merging duplicate sub-tries and using a String Table. + * @default false + */ + optimize?: boolean | undefined; + + /** + * Use a string table to reduce memory usage. + * @default false + */ + useStringTable?: boolean | undefined; +} diff --git a/packages/cspell-trie-lib/src/lib/ITrie.test.ts b/packages/cspell-trie-lib/src/lib/ITrie.test.ts index 5524aec74361..90be0a065a58 100644 --- a/packages/cspell-trie-lib/src/lib/ITrie.test.ts +++ b/packages/cspell-trie-lib/src/lib/ITrie.test.ts @@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest'; import { defaultTrieInfo } from './constants.ts'; import type { ITrie } from './ITrie.ts'; -import { ITrieImpl as ITrieClass } from './ITrie.ts'; +import { ITrieImpl as ITrieClass, iTrieToStructuredStringLines } from './ITrie.ts'; import type { ITrieNode } from './ITrieNode/ITrieNode.ts'; import { parseDictionary, parseDictionaryLegacy } from './SimpleDictionaryParser.ts'; import type { SuggestionOptions } from './suggestions/genSuggestionsOptions.ts'; @@ -418,6 +418,37 @@ describe('Validate Trie Class', () => { expect(trie.find('play+time', true)?.f).toBe(1); expect(trie.find('play++time', true)?.f).toBe(1); }); + + test('iTrieToStringLines', () => { + const words = ` + # Sample Word List + !playtime + begin + beginning + end + ending + café + cafe + time + ride + hide + riding + `; + + const trie = parseDictionary(words); + const trieForceOptimize = parseDictionary(words, { optimize: true }); + const trieWithStringTable = parseDictionary(words, { optimize: true, useStringTable: true }); + + const expected = iTrieToStructuredStringLines(trie, false); + const expectedWithId = iTrieToStructuredStringLines(trie, true); + + expect(iTrieToStructuredStringLines(trieForceOptimize, false)).toEqual(expected); + // small word list are auto optimized, so we expect the same result + expect(iTrieToStructuredStringLines(trieForceOptimize, true)).toEqual(expectedWithId); + expect(iTrieToStructuredStringLines(trieWithStringTable, false)).toEqual(expected); + // Uses a string table, so we expect a different result + expect(iTrieToStructuredStringLines(trieWithStringTable, true)).not.toEqual(expectedWithId); + }); }); const sampleWords = [ diff --git a/packages/cspell-trie-lib/src/lib/ITrie.ts b/packages/cspell-trie-lib/src/lib/ITrie.ts index f0cbbd05e0c9..88297fefafa9 100644 --- a/packages/cspell-trie-lib/src/lib/ITrie.ts +++ b/packages/cspell-trie-lib/src/lib/ITrie.ts @@ -4,7 +4,7 @@ import type { WeightMap } from './distance/index.ts'; import { createFindOptions, findLegacyCompound, findWord, findWordNode, isForbiddenWord } from './ITrieNode/find.ts'; import type { FindOptions, PartialFindOptions } from './ITrieNode/FindOptions.ts'; import type { ITrieNode, ITrieNodeRoot } from './ITrieNode/index.ts'; -import type { FindFullResult } from './ITrieNode/ITrieNode.ts'; +import type { FindFullResult, ITrieNodeId } from './ITrieNode/ITrieNode.ts'; import { countWords, iteratorTrieWords } from './ITrieNode/trie-util.ts'; import type { PartialTrieInfo, TrieInfo } from './ITrieNode/TrieInfo.ts'; import { walker } from './ITrieNode/walker/walker.ts'; @@ -438,6 +438,11 @@ export class ITrieImpl implements ITrie { return findOptions; } } + +export function createITrieFromTrieData(data: TrieData): ITrie { + return new ITrieImpl(data); +} + export interface FindWordOptions { caseSensitive?: boolean; useLegacyWordCompounds?: boolean | number; @@ -449,3 +454,115 @@ export interface FindWordOptions { } export type FindWordOptionsRO = Readonly; + +export interface WalkITriedNodeStackItem { + /** current node */ + node: ITrieNode; + /** current depth in the tree. */ + depth: number; + /** key used to get here */ + key: string; + /** the word up to this depth in the tree. */ + word: string; + /** the position to process next (0 == beginning of node) */ + pos: number; +} + +/** + * A ITrie node walker. + * + * Use `.next(stop: boolean)` to control the depth of the walk. + * + * @param trie - the trie to walk + * @param stack - optional stack to use, useful for resuming a walk or the path to a specific node. + * @param depth - optional starting depth. + */ +export function* walkITrieNodes( + trie: ITrie, + stack: WalkITriedNodeStackItem[] = [], + depth: number = 0, +): Generator { + stack[0] ||= { node: trie.data.getRoot(), depth: 0, pos: 0, key: '', word: '' }; + + while (depth >= 0) { + const item = stack[depth]; + if (!item.pos) { + const stop = yield item; + if (stop) { + depth--; + continue; + } + } + ++item.pos; + if (!item.node.hasChildren()) { + depth--; + continue; + } + const entries = [...item.node.entries()]; + if (item.pos > entries.length) { + depth--; + continue; + } + const [key, node] = entries[item.pos - 1]; + + ++depth; + stack[depth] = { node, key, depth, pos: 0, word: item.word + key }; + } +} + +export function iTrieToStructuredStringLines(trie: ITrie, withId: boolean = true): string[] { + const lines = [..._iTrieToPathAndWord(trie, withId)]; + const pathMax = lines.reduce((max, [path]) => Math.max(max, path.length), 0); + return lines.map(([path, word]) => `${path.padEnd(pathMax)} > ${word}`); +} + +function* _iTrieToPathAndWord(trie: ITrie, withId: boolean): Iterable<[string, string]> { + const stack: WalkITriedNodeStackItem[] = []; + let lastStep = ''; + const mapItem = withId ? mapWithId : mapWithoutId; + for (const item of walkITrieNodes(trie, stack)) { + if (item.node.eow) { + const steps = + stack + .slice(1, item.depth + 1) + .map(mapItem) + .join('') + '⏎'; + + const path = diffStrings(lastStep, steps, '-'); + + yield [path, item.word]; + lastStep = steps; + } + } + + function mapWithId(item: WalkITriedNodeStackItem): string { + const id = formatNodeId(item.node.id); + return `${item.key || '.'},(${id})`; + } + + function mapWithoutId(item: WalkITriedNodeStackItem): string { + return (item.key || '.') + ','; + } +} + +function findFirstDiff(a: string[], b: string[]): number { + let i = 0; + for (; i < a.length && i < b.length; ++i) { + if (a[i] !== b[i]) return i; + } + return i; +} + +function diffStrings(a: string, b: string, replace: string): string { + const aa = [...a]; + const bb = [...b]; + const idx = findFirstDiff(aa, bb); + return replace.repeat(idx) + bb.slice(idx).join(''); +} + +function formatNodeId(id: ITrieNodeId): string { + const s = id.toString(16).padStart(16, '0'); + const upper = s.slice(0, 8).replace(/^0+/, '').padStart(4, '0'); + const lower = s.slice(8).replace(/^0+/, '').padStart(1, '0'); + return `${upper}${lower ? '.' + lower : ''}`; +} diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts index 1d2f68be1bbd..85154c2b94b1 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts @@ -87,6 +87,7 @@ export interface ITrieNodeRoot extends ITrieNode { readonly forbidPrefix: string; readonly compoundFix: string; readonly caseInsensitivePrefix: string; + readonly suggestionPrefix: string; readonly hasForbiddenWords: boolean; readonly hasCompoundWords: boolean; diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/find.test.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/find.test.ts index 0bdb3a58484a..adbdaa58a947 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/find.test.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/find.test.ts @@ -1,7 +1,7 @@ import { describe, expect, test } from 'vitest'; import { parseDictionaryLegacy } from '../SimpleDictionaryParser.ts'; -import { TrieBlobBuilder } from '../TrieBlob/TrieBlobBuilder.ts'; +import { createTrieBlobFromTrieRoot } from '../TrieBlob/index.ts'; import type { TrieData } from '../TrieData.ts'; import type { TrieRoot } from '../TrieNode/TrieNode.ts'; import { TrieNodeTrie } from '../TrieNode/TrieNodeTrie.ts'; @@ -14,7 +14,7 @@ const findLegacyCompoundWord = __testing__.findLegacyCompoundWord; describe('Validate findWord', () => { const trie = dictionaryTrieNodeTrie().getRoot(); const trieBlob = dictionaryTrieBlob().getRoot(); - const trieFast = TrieBlobBuilder.fromTrieRoot(dictionaryTrieRoot()).getRoot(); + const trieFast = createTrieBlobFromTrieRoot(dictionaryTrieRoot()).getRoot(); const cModeC = { compoundMode: 'compound' }; const mCaseT = { matchCase: true }; @@ -265,7 +265,7 @@ function dictionaryTrieNodeTrie(): TrieData { } function dictionaryTrieBlob(): TrieData { - return TrieBlobBuilder.fromTrieRoot(dictionaryTrieRoot()); + return createTrieBlobFromTrieRoot(dictionaryTrieRoot()); } const sampleWords = [ diff --git a/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts b/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts index 7409c4df6803..96535ba5e83d 100644 --- a/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts +++ b/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts @@ -2,6 +2,7 @@ import type { Operator } from '@cspell/cspell-pipe/sync'; import { opCombine as opPipe, opConcatMap, opFilter, opMap } from '@cspell/cspell-pipe/sync'; import { buildITrieFromWords } from './buildITrie.ts'; +import type { BuildOptions } from './BuildOptions.ts'; import { CASE_INSENSITIVE_PREFIX, COMPOUND_FIX, @@ -19,7 +20,7 @@ import type { Trie } from './trie.ts'; import { buildTrieFast } from './TrieBuilder.ts'; import { normalizeWord, normalizeWordForCaseInsensitive } from './utils/normalizeWord.ts'; -export interface ParseDictionaryOptions { +export interface ParseDictionaryOptions extends BuildOptions { compoundCharacter: string; optionalCompoundCharacter: string; forbiddenPrefix: string; @@ -96,6 +97,18 @@ export interface ParseDictionaryOptions { * @default false */ makeWordsForbidden?: boolean; + + /** + * Optimize the trie for size by merging duplicate sub-tries and using a String Table. + * @default false + */ + optimize?: boolean; + + /** + * Use a string table to reduce memory usage. + * @default false + */ + useStringTable?: boolean; } const RegExpSplit = /[\s,;]/g; @@ -436,7 +449,8 @@ export function parseLinesToDictionary(lines: Iterable, options?: Partia const _options = mergeOptions(_defaultOptions, options); const dictLines = parseDictionaryLines(lines, _options); const words = [...new Set(dictLines)].sort(); - return buildITrieFromWords(words, trieInfoFromOptions(options)); + const { optimize, useStringTable } = options || {}; + return buildITrieFromWords(words, trieInfoFromOptions(options), { optimize, useStringTable }); } export function parseDictionary(text: string | Iterable, options?: Partial): ITrie { diff --git a/packages/cspell-trie-lib/src/lib/StringTable/StringTable.test.ts b/packages/cspell-trie-lib/src/lib/StringTable/StringTable.test.ts index 7a786c385946..9fa92430607a 100644 --- a/packages/cspell-trie-lib/src/lib/StringTable/StringTable.test.ts +++ b/packages/cspell-trie-lib/src/lib/StringTable/StringTable.test.ts @@ -59,6 +59,12 @@ describe('StringTableBuilder', () => { const retrieved = indices.map((i) => table.getString(i)); expect(retrieved).toEqual(segments); + + expect(table.bitInfo()).toEqual({ + minIndexBits: 11, + offsetBits: 7, + strLenBits: 4, + }); }); test('encode and decode StringTable', () => { @@ -73,6 +79,9 @@ describe('StringTableBuilder', () => { const decodedTable = decodeStringTableFromBinary(encoded, 'LE'); + expect(decodedTable.length).toBe(table.length); + expect(decodedTable.dataByteLength()).toBe(table.dataByteLength()); + const retrieved = indices.map((i) => decodedTable.getString(i)); expect(retrieved).toEqual(segments); }); diff --git a/packages/cspell-trie-lib/src/lib/StringTable/StringTable.ts b/packages/cspell-trie-lib/src/lib/StringTable/StringTable.ts index 70206923b2b1..73a67e77c482 100644 --- a/packages/cspell-trie-lib/src/lib/StringTable/StringTable.ts +++ b/packages/cspell-trie-lib/src/lib/StringTable/StringTable.ts @@ -78,6 +78,17 @@ export class StringTable { return this.#data.subarray(offset, offset + length); } + dataByteLength(): number { + return this.#data.byteLength; + } + + bitInfo(): { strLenBits: number; offsetBits: number; minIndexBits: number } { + const strLenBits = this.strLenBits; + const offsetBits = Math.ceil(Math.log2(this.charData.length + 1)); + const minIndexBits = strLenBits + offsetBits; + return { strLenBits, offsetBits, minIndexBits }; + } + values(): U8Array[] { return [...this.#index].map((v) => this.#getBytesByIndexValue(v)); } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.en.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.en.test.ts index 92ff7dab45c2..291aed1046b7 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.en.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.en.test.ts @@ -4,6 +4,7 @@ import { opSkip, opTake, pipe } from '@cspell/cspell-pipe/sync'; import { describe, expect, test } from 'vitest'; import { readTrieFromConfig } from '../../test/dictionaries.test.helper.ts'; +import { createTrieBlobFromTrie } from './createTrieBlob.ts'; import { TrieBlobBuilder } from './TrieBlobBuilder.ts'; function getTrie() { @@ -17,7 +18,7 @@ describe('Validate English FastTrieBlob', async () => { const pTrie = getTrie(); const sampleTrie = await pTrie; const sampleWordsLarge = [...pipe(sampleTrie.words(), opSkip(1000), opTake(6000))]; - const fastTrieBlob = TrieBlobBuilder.fromTrieRoot(sampleTrie.root); + const trieBlob = createTrieBlobFromTrie(sampleTrie); test('insert', () => { const words = sampleWordsLarge; @@ -29,7 +30,7 @@ describe('Validate English FastTrieBlob', async () => { test('has', () => { const words = sampleWordsLarge; for (const word of words) { - expect(fastTrieBlob.has(word)).toBe(true); + expect(trieBlob.has(word)).toBe(true); } }); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts index b085915d7bb4..7c0a73f3aef9 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts @@ -4,7 +4,7 @@ import { readTrieBlobFromConfig } from '../../test/dictionaries.test.helper.ts'; import { hexDump } from '../binary/index.ts'; import { validateTrie } from '../TrieNode/trie-util.ts'; import { buildTrieNodeTrieFromWords } from '../TrieNode/TrieNodeBuilder.ts'; -import { createTrieBlob } from './createTrieBlob.ts'; +import { createTrieBlob, createTrieBlobFromTrieRoot } from './createTrieBlob.ts'; import { TrieBlob } from './TrieBlob.ts'; import { TrieBlobBuilder } from './TrieBlobBuilder.ts'; import { NodeChildIndexRefShift, NodeHeaderNumChildrenMask } from './TrieBlobFormat.ts'; @@ -54,7 +54,8 @@ describe('TrieBlob', () => { expect([...trie.words()]).toEqual(sampleWords); expect(sampleWords.some((w) => !trie.has(w))).toBe(false); expect(validateTrie(trie.root).isValid).toBe(true); - const tb = TrieBlobBuilder.fromTrieRoot(trie.root); + const tb = createTrieBlobFromTrieRoot(trie.root); + // console.error('%o', JSON.parse(JSON.stringify(ft))); expect([...tb.words()]).toEqual(sampleWords); expect(sampleWords.some((w) => !tb.has(w))).toBe(false); @@ -104,7 +105,7 @@ describe('TrieBlob ITrie support methods', () => { test('getChildrenFromRef optimized', () => { const words = getWordsForDictionary(); const firstChars = [...new Set(words.map((w) => [...w][0]))].sort(); - const t = TrieBlobBuilder.fromWordList(words, undefined, true); + const t = TrieBlobBuilder.fromWordList(words, undefined, { useStringTable: true, optimize: true }); const rootRef = t.rootRef; @@ -157,7 +158,7 @@ describe('TrieBlob encode/decode', async () => { test('encode optimize hexDump', () => { const words = ['apple', 'banana', 'grape', 'orange', 'strawberry']; - const tb = TrieBlobBuilder.fromWordList(words, undefined, true); + const tb = TrieBlobBuilder.fromWordList(words, undefined, { useStringTable: true, optimize: true }); const bin = tb.encodeToBTrie(); const r = TrieBlob.decodeBin(bin); expect([...r.words()]).toEqual(words); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobBuilder.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobBuilder.test.ts index 8cc7d02c737a..476fc89e91e5 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobBuilder.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobBuilder.test.ts @@ -1,12 +1,19 @@ import { describe, expect, test } from 'vitest'; +import { registerDebugMode } from '../../test/debugger.ts'; import type { BuilderCursor } from '../Builder/index.ts'; import { insertWordsAtCursor } from '../Builder/index.ts'; +import { buildITrieFromWords } from '../buildITrie.ts'; import { consolidate } from '../consolidate.ts'; import { defaultTrieInfo } from '../constants.ts'; +import type { ITrie } from '../ITrie.ts'; +import { createITrieFromTrieData, iTrieToStructuredStringLines } from '../ITrie.ts'; import { extractTrieCharacteristics } from '../ITrieNode/TrieInfo.ts'; +import { parseDictionaryLines } from '../SimpleDictionaryParser.ts'; +import { trieRootToITrieRoot } from '../TrieNode/trie.ts'; import { createTrieRoot, insert } from '../TrieNode/trie-util.ts'; import type { TrieNode, TrieRoot } from '../TrieNode/TrieNode.ts'; +import { createTrieBlobFromTrieRoot } from './createTrieBlob.ts'; import { TrieBlobBuilder } from './TrieBlobBuilder.ts'; describe('FastTrieBlobBuilder', () => { @@ -56,7 +63,8 @@ describe('FastTrieBlobBuilder', () => { const words = [...new Set(sampleWords())].sort(); const builder = new TrieBlobBuilder(); builder.insert(words); - expect(builder.has('😀😃😄😁😆🥹😅😂🤣🥲☺️😊😇🙂🙃😉')).toBe(true); + expect(builder.has('😀😃😄😁😆🥹😅😂')).toBe(true); + expect(builder.has('🤣🥲☺️😊😇🙂🙃😉')).toBe(true); const ft = builder.build(); expect([...ft.words()]).toEqual([...words].sort()); }); @@ -150,23 +158,47 @@ describe('FastTrieBlobBuilder', () => { test('fromTrieRoot non-optimized trie', () => { const words = sampleWords(); - const t = TrieBlobBuilder.fromTrieRoot(buildTrie(words, false)); + const trie = buildTrie(words, false); + const iTrieRoot = trieRootToITrieRoot(trie); + const t = TrieBlobBuilder.fromTrieRoot(trie); const sortedUnique = [...new Set(words)].sort(); expect([...t.words()].sort()).toEqual(sortedUnique); + + const t2 = TrieBlobBuilder.fromITrieRoot(iTrieRoot); + expect([...t2.words()].sort()).toEqual(sortedUnique); + + const t3 = TrieBlobBuilder.fromITrieRoot(t.getRoot()); + expect([...t3.words()].sort()).toEqual(sortedUnique); }); test('fromTrieRoot(optimize) non-optimized trie', () => { const words = sampleWords(); - const t = TrieBlobBuilder.fromTrieRoot(buildTrie(words, false), true); + const trie = buildTrie(words, false); + const iTrieRoot = trieRootToITrieRoot(trie); + const t = createTrieBlobFromTrieRoot(trie, { useStringTable: true, optimize: true }); const sortedUnique = [...new Set(words)].sort(); expect([...t.words()]).toEqual(sortedUnique); + + const t2 = TrieBlobBuilder.fromITrieRoot(iTrieRoot); + expect([...t2.words()].sort()).toEqual(sortedUnique); + + const t3 = TrieBlobBuilder.fromITrieRoot(t.getRoot()); + expect([...t3.words()].sort()).toEqual(sortedUnique); }); test('fromTrieRoot optimized trie', () => { const words = sampleWords(); - const t = TrieBlobBuilder.fromTrieRoot(buildTrie(words, true)); + const trie = buildTrie(words); + const iTrieRoot = trieRootToITrieRoot(trie); + const t = createTrieBlobFromTrieRoot(trie, { useStringTable: true, optimize: true }); const sortedUnique = [...new Set(words)].sort(); expect([...t.words()].sort()).toEqual(sortedUnique); + + const t2 = TrieBlobBuilder.fromITrieRoot(iTrieRoot, { useStringTable: true, optimize: true }); + expect([...t2.words()].sort()).toEqual(sortedUnique); + + const t3 = TrieBlobBuilder.fromITrieRoot(t.getRoot(), { useStringTable: true, optimize: true }); + expect([...t3.words()].sort()).toEqual(sortedUnique); }); test('should be able to correctly preserve referenced nodes.', () => { @@ -184,17 +216,90 @@ describe('FastTrieBlobBuilder', () => { }); describe('optimization', () => { + // Register the debug mode to make sure suggestions do not timeout during debugging. + const isDebugMode = registerDebugMode(); + const timeout = isDebugMode ? 1_000_000 : undefined; + test.each` comment | words ${'single word'} | ${['optimization']} ${'multiple words'} | ${['optimization', 'optimize']} ${'multiple words shared endings'} | ${['optimization', 'vacation', 'sensation']} + ${'emojis'} | ${['😀😃😄😁', '😆🥹😅😂', '🤣🥲☺️😊', '😇🙂🙃😉']} ${'sampleWords()'} | ${sampleWords()} `('optimize $comment $words', ({ words }) => { const sortedUnique = [...new Set(words)].sort(); - const ft = TrieBlobBuilder.fromWordList(words, undefined, true); - expect([...ft.words()]).toEqual(sortedUnique); + const ft0 = TrieBlobBuilder.fromWordList(words, undefined, { useStringTable: false, optimize: false }); + expect([...ft0.words()]).toEqual(sortedUnique); + const ft1 = TrieBlobBuilder.fromWordList(words, undefined, { useStringTable: false, optimize: true }); + expect([...ft1.words()]).toEqual(sortedUnique); + const ft2 = TrieBlobBuilder.fromWordList(words, undefined, { useStringTable: true, optimize: true }); + expect([...ft2.words()]).toEqual(sortedUnique); + + const t0 = createITrieFromTrieData(ft0); + const t1 = createITrieFromTrieData(ft1); + const t2 = createITrieFromTrieData(ft2); + + expect(iTrieToStructuredStringLines(t0).join('\n')).toMatchSnapshot('No optimization'); + expect(iTrieToStructuredStringLines(t1).join('\n')).toMatchSnapshot('With optimization'); + expect(iTrieToStructuredStringLines(t2).join('\n')).toMatchSnapshot('With optimization and string table'); }); + + test( + 'impact on suggest (accents)', + () => { + // setDebuggerAttached(true); // turn on debug output in suggestAStar + + // cspell:ignore Geschäft Aujourd'hui + const words = [ + ...parseDictionaryLines(['Geschäft'.normalize('NFD'), 'café', 'book', "Aujourd'hui"], { + stripCaseAndAccents: true, + }), + ]; + const tb = buildITrieFromWords(words); + expect([...tb.words()]).toEqual([ + "Aujourd'hui", + 'Geschäft', + 'book', + 'café', + "~aujourd'hui", + '~cafe', + '~geschaft', + '~geschäft', + ]); + + // console.log('%s', iTrieToStructuredStringLines(tb).join('\n')); + + expect(tb.has(words[0])).toBe(true); + expect(iTrieHas(tb, words[0], true)).toBe(true); + expect(iTrieHas(tb, words[0], false)).toBe(true); + expect(iTrieHas(tb, words[0].toLowerCase(), false)).toBe(true); + words.forEach((w) => expect(tb.hasWord(w, true)).toBe(true)); + words.map((w) => w.toLowerCase()).forEach((w) => expect(iTrieHas(tb, w, false)).toBe(true)); + expect(tb.hasWord(words[0].toLowerCase(), false)).toBe(true); + expect(tb.hasWord(words[0].toLowerCase(), true)).toBe(false); + // expect(d.suggest('geschaft', { ignoreCase: false }).map((r) => r)).toEqual(['Geschäft']); + expect(tb.suggestWithCost('geschaft', { ignoreCase: true })).toEqual([ + { + cost: 0, + isPreferred: undefined, + word: 'geschaft', + }, + { + cost: 1, + isPreferred: undefined, + word: 'geschäft', + }, + { + cost: 2, + isPreferred: undefined, + word: 'Geschäft', + }, + ]); + expect(tb.suggest('geschaft', { ignoreCase: true })).toEqual(['geschaft', 'geschäft', 'Geschäft']); + }, + timeout, + ); }); function sampleWords() { @@ -217,12 +322,18 @@ function sampleWords() { "ᐃᓄᒃᑎᑐᑦ", "ᐊᓂᔑᓈᐯᒧᐎᓐ", "ᓀᐦᐃᔭᐍᐏᐣ" - 😀😃😄😁😆🥹😅😂🤣🥲☺️😊😇🙂🙃😉 - 😌😍🥰😘😗😙😚😋😛😝😜🤪🤨🧐🤓😎 - 🥸🤩🥳😏😒😞😔😟😕🙁☹️😣😖😫😩🥺 - 😢😭😤😠😡🤬🤯😳🥵🥶😶‍🌫️😱😨😰😥😓 - 🤗🤔🫣🤭🫢🫡🤫🫠🤥😶🫥😐🫤😑🫨😬 - 🙄😯😦😧😮😲🥱😴🤤😪😮‍💨😵😵‍💫🤐🥴🤢 + 😀😃😄😁😆🥹😅😂 + 🤣🥲☺️😊😇🙂🙃😉 + 😌😍🥰😘😗😙😚😋 + 😛😝😜🤪🤨🧐🤓😎 + 🥸🤩🥳😏😒😞😔😟 + 😕🙁☹️😣😖😫😩🥺 + 😢😭😤😠😡🤬🤯😳 + 🥵🥶😶‍🌫️😱😨😰😥😓 + 🤗🤔🫣🤭🫢🫡🤫🫠 + 🤥😶🫥😐🫤😑🫨😬 + 🙄😯😦😧😮😲🥱😴 + 🤤😪😮‍💨😵😵‍💫🤐🥴🤢 🤮🤧😷🤒🤕🤑🤠😈 // cspell:enable @@ -337,3 +448,14 @@ function genWords(len: number, startLetter: string, endLetter: string): string[] // function toJsonObj(s: { toJSON: () => any }) { // return JSON.parse(JSON.stringify(s)); // } + +function iTrieHas(trie: ITrie, word: string, caseSensitive: boolean): boolean { + let r = trie.hasWord(word, caseSensitive); + r ||= trie.hasWord(word.toLowerCase(), caseSensitive); + + if (!caseSensitive) { + r ||= trie.hasWord('~' + word, caseSensitive); + } + + return r; +} diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobBuilder.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobBuilder.ts index 61eaa0e0a114..6fad4ad4a751 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobBuilder.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobBuilder.ts @@ -1,4 +1,6 @@ import type { BuilderCursor, TrieBuilder } from '../Builder/index.ts'; +import type { BuildOptions } from '../BuildOptions.ts'; +import type { ITrieNode, ITrieNodeId, ITrieNodeRoot } from '../ITrieNode/index.ts'; import type { PartialTrieInfo, TrieCharacteristics, TrieInfo } from '../ITrieNode/TrieInfo.ts'; import { normalizeTrieInfo, TrieInfoBuilder } from '../ITrieNode/TrieInfo.ts'; import { StringTableBuilder } from '../StringTable/StringTable.ts'; @@ -6,7 +8,7 @@ import type { TrieNode, TrieRoot } from '../TrieNode/TrieNode.ts'; import { assert } from '../utils/assert.ts'; import { assertValidUtf16Character } from '../utils/text.ts'; import { CharIndexBuilder } from './CharIndex.ts'; -import { optimizeNodesWithStringTable } from './optimizeNodes.ts'; +import { optimizeNodes, optimizeNodesWithStringTable } from './optimizeNodes.ts'; import { resolveMap } from './resolveMap.ts'; import type { TrieBlob } from './TrieBlob.ts'; import { NodeChildIndexRefShift, NodeHeaderEOWMask, NodeMaskCharByte } from './TrieBlobFormat.ts'; @@ -15,6 +17,8 @@ import { encodeTextToUtf8_32Rev, encodeToUtf8_32Rev } from './Utf8.ts'; type FastTrieBlobNode = number[]; +const AutoOptimizeNodeCount = 1000; + export class TrieBlobBuilder implements TrieBuilder { private charIndex = new CharIndexBuilder(); private nodes: FastTrieBlobNode[]; @@ -325,22 +329,40 @@ export class TrieBlobBuilder implements TrieBuilder { return this; } - build(optimize: boolean = false): TrieBlob { + build(buildOptions?: BuildOptions): TrieBlob { + const { optimize, useStringTable } = buildOptions || {}; this._cursor?.dispose?.(); this._readonly = true; this.freeze(); const info = this.#infoBuilder.build(); - const sortedNodes = sortNodes( + let sortedNodes = sortNodes( this.nodes.map((n) => Uint32Array.from(n)), NodeMaskCharByte, ); + // Optimize automatically if the node count is small. + // This will not involve a string table. + if (optimize ?? sortNodes.length < AutoOptimizeNodeCount) { + sortedNodes = optimizeNodes(sortedNodes); + } + const stringTable = new StringTableBuilder().build(); - const r = optimize + const r = useStringTable ? optimizeNodesWithStringTable({ nodes: sortedNodes, stringTable }) : { nodes: sortedNodes, stringTable }; + // console.log('TrieBlobBuilder.build: %o', { + // optimize, + // useStringTable, + // size: r.nodes.reduce((sum, n) => sum + n.length, 0) * 4, + // numNodes: r.nodes.length, + // stringNumEntries: r.stringTable.length, + // strLenBits: r.stringTable.strLenBits, + // stringTableByteSize: r.stringTable.dataByteLength(), + // stringTableBitInfo: r.stringTable.bitInfo(), + // }); + return toTrieBlob(r.nodes, r.stringTable, normalizeTrieInfo(info.info)); } @@ -361,13 +383,27 @@ export class TrieBlobBuilder implements TrieBuilder { static fromWordList( words: readonly string[] | Iterable, options?: PartialTrieInfo, - optimize?: boolean, + buildOptions?: BuildOptions, ): TrieBlob { const ft = new TrieBlobBuilder(options); - return ft.insert(words).build(optimize); + return ft.insert(words).build(buildOptions); } - static fromTrieRoot(root: TrieRoot, optimize?: boolean): TrieBlob { + /** + * Create a TrieBlob from a TrieRoot. + * + * This is equivalent to, but slightly faster because it avoids creating an ITrieNodes + * ```ts + * static fromTrieRoot(root: TrieRoot, optimize?: boolean): TrieBlob { + * return this.fromITrieRoot(trieRootToITrieRoot(root), optimize); + * } + * ``` + * + * @param root - TrieRoot + * @param buildOptions - optional build options + * @returns TrieBlob + */ + static fromTrieRoot(root: TrieRoot, buildOptions?: BuildOptions): TrieBlob { const NodeCharIndexMask = NodeMaskCharByte; const nodeChildRefShift = NodeChildIndexRefShift; const NodeMaskEOW = NodeHeaderEOWMask; @@ -427,7 +463,76 @@ export class TrieBlobBuilder implements TrieBuilder { walk(root); - return tf.build(optimize); + return tf.build(buildOptions); + } + + /** + * Create a TrieBlob from a TrieRoot. + * + * @param root - root node + * @param buildOptions - optional build options + * @returns TrieBlob + */ + static fromITrieRoot(root: ITrieNodeRoot, buildOptions?: BuildOptions): TrieBlob { + const NodeCharIndexMask = NodeMaskCharByte; + const nodeChildRefShift = NodeChildIndexRefShift; + const NodeMaskEOW = NodeHeaderEOWMask; + + const tf = new TrieBlobBuilder(undefined, root); + const IdxEOW = tf.IdxEOW; + + const known = new Map([[root.id, 0]]); + + function resolveNode(n: ITrieNode): number { + if (n.eow && !n.hasChildren()) return IdxEOW; + const node = [n.eow ? NodeMaskEOW : 0]; + return tf.nodes.push(node) - 1; + } + + function walk(n: ITrieNode): number { + const found = known.get(n.id); + if (found) return found; + const nodeIdx = resolveMap(known, n.id, () => resolveNode(n)); + const node = tf.nodes[nodeIdx]; + if (!n.hasChildren()) return nodeIdx; + const children = n.entries(); + for (const [char, childNode] of children) { + addCharToNode(node, char, childNode); + } + return nodeIdx; + } + + function resolveChild(node: FastTrieBlobNode, charIndex: number): number { + let i = 1; + for (i = 1; i < node.length && (node[i] & NodeCharIndexMask) !== charIndex; ++i) { + // empty + } + return i; + } + + function addCharToNode(node: FastTrieBlobNode, char: string, n: ITrieNode): void { + const indexSeq = tf.letterToUtf8Seq(char); + assertValidUtf16Character(char); + // console.error('addCharToNode %o', { char, indexSeq }); + for (const idx of indexSeq.slice(0, -1)) { + const pos = resolveChild(node, idx); + if (pos < node.length) { + node = tf.nodes[node[pos] >>> nodeChildRefShift]; + } else { + const next: FastTrieBlobNode = [0]; + const nodeIdx = tf.nodes.push(next) - 1; + node[pos] = (nodeIdx << nodeChildRefShift) | idx; + node = next; + } + } + const letterIdx = indexSeq[indexSeq.length - 1]; + const i = node.push(letterIdx) - 1; + node[i] = (walk(n) << nodeChildRefShift) | letterIdx; + } + + walk(root); + + return tf.build(buildOptions); } } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts index c34fd571d948..5f6d909f1233 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts @@ -154,4 +154,8 @@ export class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot { get caseInsensitivePrefix(): string { return this.info.stripCaseAndAccentsPrefix; } + + get suggestionPrefix(): string { + return this.info.suggestionPrefix; + } } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/__snapshots__/TrieBlob.test.ts.snap b/packages/cspell-trie-lib/src/lib/TrieBlob/__snapshots__/TrieBlob.test.ts.snap index eadd3323f6b0..166a81cc07fb 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/__snapshots__/TrieBlob.test.ts.snap +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/__snapshots__/TrieBlob.test.ts.snap @@ -2,48 +2,47 @@ exports[`TrieBlob encode/decode > encode hexDump 1`] = ` "00000000 54 72 69 65 42 6c 6f 62 01 02 03 04 30 30 2e 30 TrieBlob....00.0 -00000010 31 2e 30 30 84 00 00 00 f4 00 00 00 00 00 00 00 1.00............ +00000010 31 2e 30 30 84 00 00 00 e4 00 00 00 00 00 00 00 1.00............ 00000020 00 00 00 00 21 21 7e 7e 2b 2b 3a 3a 00 00 00 00 ....!!~~++::.... 00000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000050 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000060 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ -00000080 00 00 00 00 05 00 00 00 61 07 00 00 62 0f 00 00 ........a...b... -00000090 67 19 00 00 6f 21 00 00 73 2b 00 00 00 01 00 00 g...o!..s+...... -000000a0 01 00 00 00 70 09 00 00 01 00 00 00 70 0b 00 00 ....p.......p... -000000b0 01 00 00 00 6c 0d 00 00 01 00 00 00 65 06 00 00 ....l.......e... -000000c0 01 00 00 00 61 11 00 00 01 00 00 00 6e 13 00 00 ....a.......n... -000000d0 01 00 00 00 61 15 00 00 01 00 00 00 6e 17 00 00 ....a.......n... -000000e0 01 00 00 00 61 06 00 00 01 00 00 00 72 1b 00 00 ....a.......r... -000000f0 01 00 00 00 61 1d 00 00 01 00 00 00 70 1f 00 00 ....a.......p... -00000100 01 00 00 00 65 06 00 00 01 00 00 00 72 23 00 00 ....e.......r#.. -00000110 01 00 00 00 61 25 00 00 01 00 00 00 6e 27 00 00 ....a%......n'.. -00000120 01 00 00 00 67 29 00 00 01 00 00 00 65 06 00 00 ....g)......e... -00000130 01 00 00 00 74 2d 00 00 01 00 00 00 72 2f 00 00 ....t-......r/.. -00000140 01 00 00 00 61 31 00 00 01 00 00 00 77 33 00 00 ....a1......w3.. -00000150 01 00 00 00 62 35 00 00 01 00 00 00 65 37 00 00 ....b5......e7.. -00000160 01 00 00 00 72 39 00 00 01 00 00 00 72 3b 00 00 ....r9......r;.. -00000170 01 00 00 00 79 06 00 00 ....y..." +00000080 00 00 00 00 05 00 00 00 61 0d 00 00 62 17 00 00 ........a...b... +00000090 67 1d 00 00 6f 25 00 00 73 37 00 00 00 01 00 00 g...o%..s7...... +000000a0 01 00 00 00 65 06 00 00 01 00 00 00 6c 07 00 00 ....e.......l... +000000b0 01 00 00 00 70 09 00 00 01 00 00 00 70 0b 00 00 ....p.......p... +000000c0 01 00 00 00 61 06 00 00 01 00 00 00 6e 0f 00 00 ....a.......n... +000000d0 01 00 00 00 61 11 00 00 01 00 00 00 6e 13 00 00 ....a.......n... +000000e0 01 00 00 00 61 15 00 00 01 00 00 00 70 07 00 00 ....a.......p... +000000f0 01 00 00 00 61 19 00 00 01 00 00 00 72 1b 00 00 ....a.......r... +00000100 01 00 00 00 67 07 00 00 01 00 00 00 6e 1f 00 00 ....g.......n... +00000110 01 00 00 00 61 21 00 00 01 00 00 00 72 23 00 00 ....a!......r#.. +00000120 01 00 00 00 79 06 00 00 01 00 00 00 72 27 00 00 ....y.......r'.. +00000130 01 00 00 00 72 29 00 00 01 00 00 00 65 2b 00 00 ....r)......e+.. +00000140 01 00 00 00 62 2d 00 00 01 00 00 00 77 2f 00 00 ....b-......w/.. +00000150 01 00 00 00 61 31 00 00 01 00 00 00 72 33 00 00 ....a1......r3.. +00000160 01 00 00 00 74 35 00 00 ....t5.." `; exports[`TrieBlob encode/decode > encode optimize hexDump 1`] = ` "00000000 54 72 69 65 42 6c 6f 62 01 02 03 04 30 30 2e 30 TrieBlob....00.0 -00000010 31 2e 30 30 84 00 00 00 30 00 00 00 00 00 00 00 1.00....0....... +00000010 31 2e 30 30 84 00 00 00 54 00 00 00 00 00 00 00 1.00....T....... 00000020 00 00 00 00 21 21 7e 7e 2b 2b 3a 3a 00 00 00 00 ....!!~~++::.... -00000030 00 00 00 00 00 00 00 00 00 00 00 00 b4 00 00 00 ................ -00000040 67 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 g............... +00000030 00 00 00 00 00 00 00 00 00 00 00 00 d8 00 00 00 ................ +00000040 46 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 F............... 00000050 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000060 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ -00000080 00 00 00 00 05 00 00 00 61 07 00 00 62 08 00 00 ........a...b... -00000090 67 09 00 00 6f 0a 00 00 73 0b 00 00 00 01 00 00 g...o...s....... -000000a0 00 09 00 00 00 13 00 00 00 19 00 00 00 21 00 00 .............!.. -000000b0 00 33 00 00 10 04 53 54 00 00 00 00 18 00 00 00 .3....ST........ -000000c0 34 00 00 00 4c 00 00 00 1b 00 00 00 00 00 51 00 4...L.........Q. -000000d0 52 01 43 01 34 01 21 00 a2 00 93 00 a4 00 95 00 R.C.4.!......... -000000e0 92 01 83 01 74 01 12 01 03 01 f4 00 e5 00 81 00 ....t........... -000000f0 72 00 63 00 54 00 45 00 36 00 27 00 18 00 09 00 r.c.T.E.6.'..... -00000100 74 72 61 77 62 65 72 72 79 61 6e 61 6e 61 72 61 trawberryananara -00000110 6e 67 65 70 70 6c 65 72 61 70 65 ngepplerape" +00000080 00 00 00 00 05 00 00 00 61 09 00 00 62 0b 00 00 ........a...b... +00000090 67 0d 00 00 6f 0f 00 00 73 13 00 00 00 01 00 00 g...o...s....... +000000a0 01 00 00 00 65 06 00 00 01 04 00 00 6c 07 00 00 ....e.......l... +000000b0 01 0c 00 00 61 06 00 00 01 10 00 00 70 07 00 00 ....a.......p... +000000c0 01 12 00 00 67 07 00 00 01 1a 00 00 79 06 00 00 ....g.......y... +000000d0 01 1c 00 00 77 11 00 00 10 03 53 54 00 00 00 00 ....w.....ST.... +000000e0 18 00 00 00 1e 00 00 00 36 00 00 00 10 00 00 00 ........6....... +000000f0 00 00 71 00 72 00 09 00 02 00 0b 00 04 00 01 00 ..q.r........... +00000100 42 00 43 00 31 00 32 00 2b 00 24 00 5b 00 61 6e B.C.1.2.+.$.[.an +00000110 61 6e 62 65 72 72 72 61 6e 74 72 61 70 70 anberrrantrapp" `; diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/__snapshots__/TrieBlobBuilder.test.ts.snap b/packages/cspell-trie-lib/src/lib/TrieBlob/__snapshots__/TrieBlobBuilder.test.ts.snap new file mode 100644 index 000000000000..4acb937426c1 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/__snapshots__/TrieBlobBuilder.test.ts.snap @@ -0,0 +1,373 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`optimization > optimize 'emojis' [ '😀😃😄😁', '😆🥹😅😂', '🤣🥲☺️😊', '😇🙂🙃😉' ] > No optimization 1`] = ` +"😀,(000c.0)😃,(0014.0)😄,(001c.0)😁,(0024.0)⏎ > 😀😃😄😁 +😆,(0025.0)🥹,(002d.0)😅,(0035.0)😂,(003d.0)⏎ > 😆🥹😅😂 +😇,(005d.0)🙂,(0065.0)🙃,(006d.0)😉,(0075.0)⏎ > 😇🙂🙃😉 +🤣,(0040.0)🥲,(0048.0)☺,(004e.0)️,(0054.0)😊,(005c.0)⏎ > 🤣🥲☺️😊" +`; + +exports[`optimization > optimize 'emojis' [ '😀😃😄😁', '😆🥹😅😂', '🤣🥲☺️😊', '😇🙂🙃😉' ] > With optimization 1`] = ` +"😀,(0019.0)😃,(0011.0)😄,(0009.0)😁,(0002.0)⏎ > 😀😃😄😁 +😆,(0031.0)🥹,(0029.0)😅,(0021.0)😂,(0002.0)⏎ > 😆🥹😅😂 +😇,(0049.0)🙂,(0041.0)🙃,(0039.0)😉,(0002.0)⏎ > 😇🙂🙃😉 +🤣,(0069.0)🥲,(0061.0)☺,(005b.0)️,(0055.0)😊,(0002.0)⏎ > 🤣🥲☺️😊" +`; + +exports[`optimization > optimize 'emojis' [ '😀😃😄😁', '😆🥹😅😂', '🤣🥲☺️😊', '😇🙂🙃😉' ] > With optimization and string table 1`] = ` +"😀,(0008.0)😃,(0006.2)😄,(0004.1)😁,(0003.0)⏎ > 😀😃😄😁 +😆,(000e.0)🥹,(000c.2)😅,(000a.1)😂,(0003.0)⏎ > 😆🥹😅😂 +😇,(0014.0)🙂,(0012.2)🙃,(0010.1)😉,(0003.0)⏎ > 😇🙂🙃😉 +🤣,(001e.1)🥲,(001c.0)☺,(001c.3)️,(001a.1)😊,(0003.0)⏎ > 🤣🥲☺️😊" +`; + +exports[`optimization > optimize 'multiple words shared endings' [ 'optimization', 'vacation', 'sensation' ] > No optimization 1`] = ` +"o,(0005.0)p,(0007.0)t,(0009.0)i,(000b.0)m,(000d.0)i,(000f.0)z,(0011.0)a,(0013.0)t,(0015.0)i,(0017.0)o,(0019.0)n,(0004.0)⏎ > optimization +s,(0029.0)e,(002b.0)n,(002d.0)s,(002f.0)a,(0031.0)t,(0033.0)i,(0035.0)o,(0037.0)n,(0004.0)⏎ > sensation +v,(001b.0)a,(001d.0)c,(001f.0)a,(0021.0)t,(0023.0)i,(0025.0)o,(0027.0)n,(0004.0)⏎ > vacation" +`; + +exports[`optimization > optimize 'multiple words shared endings' [ 'optimization', 'vacation', 'sensation' ] > With optimization 1`] = ` +"o,(0019.0)p,(0017.0)t,(0015.0)i,(0013.0)m,(0011.0)i,(000f.0)z,(000d.0)a,(000b.0)t,(0009.0)i,(0007.0)o,(0005.0)n,(0004.0)⏎ > optimization +s,(001f.0)e,(001d.0)n,(001b.0)s,(000d.0)a,(000b.0)t,(0009.0)i,(0007.0)o,(0005.0)n,(0004.0)⏎ > sensation +v,(0023.0)a,(0021.0)c,(000d.0)a,(000b.0)t,(0009.0)i,(0007.0)o,(0005.0)n,(0004.0)⏎ > vacation" +`; + +exports[`optimization > optimize 'multiple words shared endings' [ 'optimization', 'vacation', 'sensation' ] > With optimization and string table 1`] = ` +"o,(0008.0)p,(0006.0)t,(0006.1)i,(0006.2)m,(0006.3)i,(0006.4)z,(0005.0)a,(0005.1)t,(0005.2)i,(0005.3)o,(0005.4)n,(0005.5)⏎ > optimization +s,(000a.0)e,(000a.1)n,(000a.2)s,(0005.0)a,(0005.1)t,(0005.2)i,(0005.3)o,(0005.4)n,(0005.5)⏎ > sensation +v,(000c.0)a,(000c.1)c,(0005.0)a,(0005.1)t,(0005.2)i,(0005.3)o,(0005.4)n,(0005.5)⏎ > vacation" +`; + +exports[`optimization > optimize 'multiple words' [ 'optimization', 'optimize' ] > No optimization 1`] = ` +"o,(0003.0)p,(0005.0)t,(0007.0)i,(0009.0)m,(000b.0)i,(000d.0)z,(000f.0)a,(0012.0)t,(0014.0)i,(0016.0)o,(0018.0)n,(0002.0)⏎ > optimization +----------------------------------------------------------------------e,(0002.0)⏎ > optimize" +`; + +exports[`optimization > optimize 'multiple words' [ 'optimization', 'optimize' ] > With optimization 1`] = ` +"o,(0018.0)p,(0016.0)t,(0014.0)i,(0012.0)m,(0010.0)i,(000e.0)z,(000b.0)a,(0009.0)t,(0007.0)i,(0005.0)o,(0003.0)n,(0002.0)⏎ > optimization +----------------------------------------------------------------------e,(0002.0)⏎ > optimize" +`; + +exports[`optimization > optimize 'multiple words' [ 'optimization', 'optimize' ] > With optimization and string table 1`] = ` +"o,(0000.1)p,(0000.2)t,(0005.0)i,(0005.1)m,(0005.2)i,(0005.3)z,(0005.4)a,(0003.0)t,(0003.1)i,(0003.2)o,(0003.3)n,(0002.0)⏎ > optimization +----------------------------------------------------------------------e,(0002.0)⏎ > optimize" +`; + +exports[`optimization > optimize 'sampleWords()' [ 'Here', 'are', 'a', 'few', 'words', 'to', 'use', 'as', 'a', …(97) ] > No optimization 1`] = ` +"A,(0509.0)B,(050b.0)C,(050d.0)D,(050f.0)E,(0511.0)F,(0513.0)G,(0515.0)H,(0022.0)⏎ > ABCDEFGH +H,(0023.0)e,(0025.0)r,(0027.0)e,(0022.0)⏎ > Here +I,(0517.0)J,(0519.0)K,(051b.0)L,(051d.0)M,(051f.0)N,(0521.0)O,(0523.0)P,(0022.0)⏎ > IJKLMNOP +Q,(0525.0)R,(0527.0)S,(0529.0)T,(052b.0)U,(052d.0)V,(052f.0)W,(0531.0)X,(0022.0)⏎ > QRSTUVWX +T,(005a.0)h,(005c.0)e,(005e.0)y,(0022.0)⏎ > They +Y,(0533.0)Z,(0022.0)⏎ > YZ +^,(0022.0)⏎ > ^ +\`,(0022.0)⏎ > \` +a,(0029.0)⏎ > a +----------b,(0535.0)c,(0537.0)d,(0539.0)e,(053b.0)f,(053d.0)g,(053f.0)h,(0022.0)⏎ > abcdefgh +----------n,(00d6.0)d,(0022.0)⏎ > and +----------r,(002e.0)e,(0022.0)⏎ > are +----------s,(0022.0)⏎ > as +b,(006c.0)e,(0022.0)⏎ > be +----------l,(00af.0)u,(00b1.0)e,(0022.0)⏎ > blue +c,(00d8.0)s,(00da.0)p,(00dc.0)e,(00de.0)l,(00e0.0)l,(0022.0)⏎ > cspell +d,(0046.0)a,(00d1.0)r,(00d4.0)k,(0022.0)⏎ > dark +--------------------y,(0022.0)⏎ > day +----------i,(0049.0)c,(004c.0)t,(004e.0)i,(0050.0)o,(0052.0)n,(0054.0)a,(0056.0)r,(0058.0)y,(0022.0)⏎ > dictionary +--------------------s,(00e2.0)a,(00e4.0)b,(00e6.0)l,(00e8.0)e,(0022.0)⏎ > disable +e,(04ff.0)n,(0501.0)a,(0503.0)b,(0505.0)l,(0507.0)e,(0022.0)⏎ > enable +f,(0030.0)e,(0033.0)w,(0022.0)⏎ > few +----------i,(00cb.0)r,(00cd.0)s,(00cf.0)t,(0022.0)⏎ > first +g,(00a7.0)r,(00a9.0)e,(00ab.0)e,(00ad.0)n,(0022.0)⏎ > green +i,(0541.0)j,(0543.0)k,(0545.0)l,(0547.0)m,(0549.0)n,(054b.0)o,(054d.0)p,(0022.0)⏎ > ijklmnop +j,(0060.0)u,(0062.0)s,(0064.0)t,(0022.0)⏎ > just +n,(0066.0)e,(0068.0)e,(006a.0)d,(0022.0)⏎ > need +o,(00bd.0)f,(0022.0)⏎ > of +----------n,(0022.0)⏎ > on +----------r,(00c1.0)a,(00c3.0)n,(00c5.0)g,(00c7.0)e,(0022.0)⏎ > orange +p,(0093.0)l,(0095.0)a,(0097.0)y,(0099.0)⏎ > play +----------------------------------------e,(009c.0)d,(0022.0)⏎ > played +--------------------------------------------------r,(0022.0)⏎ > player +----------------------------------------i,(009f.0)n,(00a1.0)g,(0022.0)⏎ > playing +q,(054f.0)r,(0551.0)s,(0553.0)t,(0555.0)u,(0557.0)v,(0559.0)w,(055b.0)x,(055d.0)ἀ,(0022.0)⏎ > qrstuvwxἀ +r,(00a3.0)e,(00a5.0)d,(0022.0)⏎ > red +s,(006f.0)p,(0071.0)l,(0073.0)i,(0075.0)t,(0022.0)⏎ > split +t,(003e.0)a,(0085.0)l,(0087.0)k,(0089.0)⏎ > talk +----------------------------------------e,(008c.0)d,(0022.0)⏎ > talked +--------------------------------------------------r,(0022.0)⏎ > talker +----------------------------------------i,(008f.0)n,(0091.0)g,(0022.0)⏎ > talking +----------h,(00c9.0)e,(0022.0)⏎ > the +----------o,(0022.0)⏎ > to +u,(0042.0)s,(0044.0)e,(0022.0)⏎ > use +w,(0035.0)a,(0077.0)l,(0079.0)k,(007b.0)⏎ > walk +----------------------------------------e,(007e.0)d,(0022.0)⏎ > walked +--------------------------------------------------r,(0022.0)⏎ > walker +----------------------------------------i,(0081.0)n,(0083.0)g,(0022.0)⏎ > walking +----------o,(0038.0)r,(003a.0)d,(003c.0)s,(0022.0)⏎ > words +y,(00b3.0)e,(00b5.0)l,(00b7.0)l,(00b9.0)o,(00bb.0)w,(0022.0)⏎ > yellow +Έ,(0994.0)Ὴ,(099a.0)Ή,(099e.0)ῌ,(09a4.0)῍,(09aa.0)῎,(09b0.0)῏,(09b6.0)ῐ,(0022.0)⏎ > ΈῊΉῌ῍῎῏ῐ +ά,(07c2.0)ὲ,(07c8.0)έ,(07cc.0)ὴ,(07d2.0)ή,(07d6.0)ὶ,(07dc.0)ί,(07e0.0)ὸ,(0022.0)⏎ > άὲέὴήὶίὸ +ό,(07e8.0)ὺ,(07ee.0)ύ,(07f2.0)ὼ,(07f8.0)ώ,(07fc.0)὾,(0802.0)὿,(0808.0)ᾀ,(0022.0)⏎ > όὺύὼώ὾὿ᾀ +ພ,(011f.0)າ,(0125.0)ສ,(012b.0)າ,(0131.0)ລ,(0137.0)າ,(013d.0)ວ,(0022.0)⏎ > ພາສາລາວ +ት,(00f5.0)ግ,(00fb.0)ር,(0101.0)ኛ,(0022.0)⏎ > ትግርኛ +አ,(0109.0)ማ,(010f.0)ር,(0115.0)ኛ,(0022.0)⏎ > አማርኛ +ᐃ,(015c.0)ᓄ,(0162.0)ᒃ,(0168.0)ᑎ,(016e.0)ᑐ,(0174.0)ᑦ,(0022.0)⏎ > ᐃᓄᒃᑎᑐᑦ +ᐊ,(017a.0)ᓂ,(0180.0)ᔑ,(0186.0)ᓈ,(018c.0)ᐯ,(0192.0)ᒧ,(0198.0)ᐎ,(019e.0)ᓐ,(0022.0)⏎ > ᐊᓂᔑᓈᐯᒧᐎᓐ +ᓀ,(01a6.0)ᐦ,(01ac.0)ᐃ,(01b2.0)ᔭ,(01b8.0)ᐍ,(01be.0)ᐏ,(01c4.0)ᐣ,(0022.0)⏎ > ᓀᐦᐃᔭᐍᐏᐣ +ἁ,(056c.0)ἂ,(0572.0)ἃ,(0578.0)ἄ,(057e.0)ἅ,(0584.0)ἆ,(058a.0)ἇ,(0590.0)Ἀ,(0022.0)⏎ > ἁἂἃἄἅἆἇἈ +Ἁ,(0596.0)Ἂ,(059c.0)Ἃ,(05a2.0)Ἄ,(05a8.0)Ἅ,(05ae.0)Ἆ,(05b4.0)Ἇ,(05ba.0)ἐ,(0022.0)⏎ > ἉἊἋἌἍἎἏἐ +ἑ,(05c0.0)ἒ,(05c6.0)ἓ,(05cc.0)ἔ,(05d2.0)ἕ,(05d8.0)἖,(05de.0)἗,(05e4.0)Ἐ,(0022.0)⏎ > ἑἒἓἔἕ἖἗Ἐ +Ἑ,(05ea.0)Ἒ,(05f0.0)Ἓ,(05f6.0)Ἔ,(05fc.0)Ἕ,(0602.0)἞,(0608.0)἟,(060e.0)ἠ,(0022.0)⏎ > ἙἚἛἜἝ἞἟ἠ +ἡ,(0614.0)ἢ,(061a.0)ἣ,(0620.0)ἤ,(0626.0)ἥ,(062c.0)ἦ,(0632.0)ἧ,(0638.0)Ἠ,(0022.0)⏎ > ἡἢἣἤἥἦἧἨ +Ἡ,(063e.0)Ἢ,(0644.0)Ἣ,(064a.0)Ἤ,(0650.0)Ἥ,(0656.0)Ἦ,(065c.0)Ἧ,(0662.0)ἰ,(0022.0)⏎ > ἩἪἫἬἭἮἯἰ +ἱ,(0668.0)ἲ,(066e.0)ἳ,(0674.0)ἴ,(067a.0)ἵ,(0680.0)ἶ,(0686.0)ἷ,(068c.0)Ἰ,(0022.0)⏎ > ἱἲἳἴἵἶἷἸ +Ἱ,(0692.0)Ἲ,(0698.0)Ἳ,(069e.0)Ἴ,(06a4.0)Ἵ,(06aa.0)Ἶ,(06b0.0)Ἷ,(06b6.0)ὀ,(0022.0)⏎ > ἹἺἻἼἽἾἿὀ +ὁ,(06c3.0)ὂ,(06c9.0)ὃ,(06cf.0)ὄ,(06d5.0)ὅ,(06db.0)὆,(06e1.0)὇,(06e7.0)Ὀ,(0022.0)⏎ > ὁὂὃὄὅ὆὇Ὀ +Ὁ,(06ed.0)Ὂ,(06f3.0)Ὃ,(06f9.0)Ὄ,(06ff.0)Ὅ,(0705.0)὎,(070b.0)὏,(0711.0)ὐ,(0022.0)⏎ > ὉὊὋὌὍ὎὏ὐ +ὑ,(0717.0)ὒ,(071d.0)ὓ,(0723.0)ὔ,(0729.0)ὕ,(072f.0)ὖ,(0735.0)ὗ,(073b.0)὘,(0022.0)⏎ > ὑὒὓὔὕὖὗ὘ +Ὑ,(0741.0)὚,(0747.0)Ὓ,(074d.0)὜,(0753.0)Ὕ,(0759.0)὞,(075f.0)Ὗ,(0765.0)ὠ,(0022.0)⏎ > Ὑ὚Ὓ὜Ὕ὞Ὗὠ +ὡ,(076b.0)ὢ,(0771.0)ὣ,(0777.0)ὤ,(077d.0)ὥ,(0783.0)ὦ,(0789.0)ὧ,(078f.0)Ὠ,(0022.0)⏎ > ὡὢὣὤὥὦὧὨ +Ὡ,(0795.0)Ὢ,(079b.0)Ὣ,(07a1.0)Ὤ,(07a7.0)Ὥ,(07ad.0)Ὦ,(07b3.0)Ὧ,(07b9.0)ὰ,(0022.0)⏎ > ὩὪὫὬὭὮὯὰ +ᾁ,(0817.0)ᾂ,(081d.0)ᾃ,(0823.0)ᾄ,(0829.0)ᾅ,(082f.0)ᾆ,(0835.0)ᾇ,(083b.0)ᾈ,(0022.0)⏎ > ᾁᾂᾃᾄᾅᾆᾇᾈ +ᾉ,(0841.0)ᾊ,(0847.0)ᾋ,(084d.0)ᾌ,(0853.0)ᾍ,(0859.0)ᾎ,(085f.0)ᾏ,(0865.0)ᾐ,(0022.0)⏎ > ᾉᾊᾋᾌᾍᾎᾏᾐ +ᾑ,(086b.0)ᾒ,(0871.0)ᾓ,(0877.0)ᾔ,(087d.0)ᾕ,(0883.0)ᾖ,(0889.0)ᾗ,(088f.0)ᾘ,(0022.0)⏎ > ᾑᾒᾓᾔᾕᾖᾗᾘ +ᾙ,(0895.0)ᾚ,(089b.0)ᾛ,(08a1.0)ᾜ,(08a7.0)ᾝ,(08ad.0)ᾞ,(08b3.0)ᾟ,(08b9.0)ᾠ,(0022.0)⏎ > ᾙᾚᾛᾜᾝᾞᾟᾠ +ᾡ,(08bf.0)ᾢ,(08c5.0)ᾣ,(08cb.0)ᾤ,(08d1.0)ᾥ,(08d7.0)ᾦ,(08dd.0)ᾧ,(08e3.0)ᾨ,(0022.0)⏎ > ᾡᾢᾣᾤᾥᾦᾧᾨ +ᾩ,(08e9.0)ᾪ,(08ef.0)ᾫ,(08f5.0)ᾬ,(08fb.0)ᾭ,(0901.0)ᾮ,(0907.0)ᾯ,(090d.0)ᾰ,(0022.0)⏎ > ᾩᾪᾫᾬᾭᾮᾯᾰ +ᾱ,(0913.0)ᾲ,(0919.0)ᾳ,(091f.0)ᾴ,(0925.0)᾵,(092b.0)ᾶ,(0931.0)ᾷ,(0937.0)Ᾰ,(0022.0)⏎ > ᾱᾲᾳᾴ᾵ᾶᾷᾸ +Ᾱ,(093d.0)Ὰ,(0943.0)Ά,(0947.0)ᾼ,(094d.0)᾽,(0953.0)ι,(0957.0)᾿,(095d.0)῀,(0022.0)⏎ > ᾹᾺΆᾼ᾽ι᾿῀ +῁,(096a.0)ῂ,(0970.0)ῃ,(0976.0)ῄ,(097c.0)῅,(0982.0)ῆ,(0988.0)ῇ,(098e.0)Ὲ,(0022.0)⏎ > ῁ῂῃῄ῅ῆῇῈ +ῑ,(09bc.0)ῒ,(09c2.0)ΐ,(09c6.0)῔,(09cc.0)῕,(09d2.0)ῖ,(09d8.0)ῗ,(09de.0)Ῐ,(0022.0)⏎ > ῑῒΐ῔῕ῖῗῘ +Ῑ,(09e4.0)Ὶ,(09ea.0)Ί,(09ee.0)῜,(09f4.0)῝,(09fa.0)῞,(0a00.0)῟,(0a06.0)ῠ,(0022.0)⏎ > ῙῚΊ῜῝῞῟ῠ +ῡ,(0a0c.0)ῢ,(0a12.0)ΰ,(0a16.0)ῤ,(0a1c.0)ῥ,(0a22.0)ῦ,(0a28.0)ῧ,(0a2e.0)Ῠ,(0022.0)⏎ > ῡῢΰῤῥῦῧῨ +Ῡ,(0a34.0)Ὺ,(0a3a.0)Ύ,(0a3e.0)Ῥ,(0a44.0)῭,(0a4a.0)΅,(0a4e.0)\`,(0a50.0)῰,(0022.0)⏎ > ῩῪΎῬ῭΅\`῰ +῱,(0a56.0)ῲ,(0a5c.0)ῳ,(0a62.0)ῴ,(0a68.0)῵,(0a6e.0)ῶ,(0a74.0)ῷ,(0a7a.0)Ὸ,(0022.0)⏎ > ῱ῲῳῴ῵ῶῷῸ +ꦧ,(0147.0)ꦱ,(014d.0)ꦗ,(0153.0)ꦮ,(0022.0)⏎ > ꦧꦱꦗꦮ +😀,(01d7.0)😃,(01df.0)😄,(01e7.0)😁,(01ef.0)😆,(01f7.0)🥹,(01ff.0)😅,(0207.0)😂,(020f.0)⏎ > 😀😃😄😁😆🥹😅😂 +😌,(0253.0)😍,(025b.0)🥰,(0263.0)😘,(026b.0)😗,(0273.0)😙,(027b.0)😚,(0283.0)😋,(028b.0)⏎ > 😌😍🥰😘😗😙😚😋 +😕,(0301.0)🙁,(0309.0)☹,(030f.0)️,(0315.0)😣,(031d.0)😖,(0325.0)😫,(032d.0)😩,(0335.0)🥺,(033d.0)⏎ > 😕🙁☹️😣😖😫😩🥺 +😛,(028c.0)😝,(0294.0)😜,(029c.0)🤪,(02a4.0)🤨,(02ac.0)🧐,(02b4.0)🤓,(02bc.0)😎,(02c4.0)⏎ > 😛😝😜🤪🤨🧐🤓😎 +😢,(033e.0)😭,(0346.0)😤,(034e.0)😠,(0356.0)😡,(035e.0)🤬,(0366.0)🤯,(036e.0)😳,(0376.0)⏎ > 😢😭😤😠😡🤬🤯😳 +🙄,(0438.0)😯,(0440.0)😦,(0448.0)😧,(0450.0)😮,(0458.0)😲,(0460.0)🥱,(0468.0)😴,(0470.0)⏎ > 🙄😯😦😧😮😲🥱😴 +🤗,(03c4.0)🤔,(03cc.0)🫣,(03d4.0)🤭,(03dc.0)🫢,(03e4.0)🫡,(03ec.0)🤫,(03f4.0)🫠,(03fc.0)⏎ > 🤗🤔🫣🤭🫢🫡🤫🫠 +🤣,(0216.0)🥲,(021e.0)☺,(0224.0)️,(022a.0)😊,(0232.0)😇,(023a.0)🙂,(0242.0)🙃,(024a.0)😉,(0252.0)⏎ > 🤣🥲☺️😊😇🙂🙃😉 +🤤,(0471.0)😪,(0479.0)😮,(0481.0)‍,(0487.0)💨,(048f.0)😵,(0497.0)😵,(049f.0)‍,(04a5.0)💫,(04ad.0)🤐,(04b5.0)🥴,(04bd.0)🤢,(04c5.0)⏎ > 🤤😪😮‍💨😵😵‍💫🤐🥴🤢 +🤥,(03fd.0)😶,(0405.0)🫥,(040d.0)😐,(0415.0)🫤,(041d.0)😑,(0425.0)🫨,(042d.0)😬,(0435.0)⏎ > 🤥😶🫥😐🫤😑🫨😬 +🤮,(04c6.0)🤧,(04ce.0)😷,(04d6.0)🤒,(04de.0)🤕,(04e6.0)🤑,(04ee.0)🤠,(04f6.0)😈,(04fe.0)⏎ > 🤮🤧😷🤒🤕🤑🤠😈 +🥵,(0377.0)🥶,(037f.0)😶,(0387.0)‍,(038d.0)🌫,(0395.0)️,(039b.0)😱,(03a3.0)😨,(03ab.0)😰,(03b3.0)😥,(03bb.0)😓,(03c3.0)⏎ > 🥵🥶😶‍🌫️😱😨😰😥😓 +🥸,(02c8.0)🤩,(02d0.0)🥳,(02d8.0)😏,(02e0.0)😒,(02e8.0)😞,(02f0.0)😔,(02f8.0)😟,(0300.0)⏎ > 🥸🤩🥳😏😒😞😔😟" +`; + +exports[`optimization > optimize 'sampleWords()' [ 'Here', 'are', 'a', 'few', 'words', 'to', 'use', 'as', 'a', …(97) ] > With optimization 1`] = ` +"A,(002f.0)B,(002d.0)C,(002b.0)D,(0029.0)E,(0027.0)F,(0025.0)G,(0023.0)H,(0022.0)⏎ > ABCDEFGH +H,(0035.0)e,(0033.0)r,(0031.0)e,(0022.0)⏎ > Here +I,(0043.0)J,(0041.0)K,(003f.0)L,(003d.0)M,(003b.0)N,(0039.0)O,(0037.0)P,(0022.0)⏎ > IJKLMNOP +Q,(0051.0)R,(004f.0)S,(004d.0)T,(004b.0)U,(0049.0)V,(0047.0)W,(0045.0)X,(0022.0)⏎ > QRSTUVWX +T,(0057.0)h,(0055.0)e,(0053.0)y,(0022.0)⏎ > They +Y,(0059.0)Z,(0022.0)⏎ > YZ +^,(0022.0)⏎ > ^ +\`,(0022.0)⏎ > \` +a,(0069.0)⏎ > a +----------b,(0065.0)c,(0063.0)d,(0061.0)e,(005f.0)f,(005d.0)g,(005b.0)h,(0022.0)⏎ > abcdefgh +----------n,(0067.0)d,(0022.0)⏎ > and +----------r,(0031.0)e,(0022.0)⏎ > are +----------s,(0022.0)⏎ > as +b,(0070.0)e,(0022.0)⏎ > be +----------l,(006e.0)u,(0031.0)e,(0022.0)⏎ > blue +c,(007b.0)s,(0079.0)p,(0077.0)e,(0075.0)l,(0073.0)l,(0022.0)⏎ > cspell +d,(0097.0)a,(007f.0)r,(007d.0)k,(0022.0)⏎ > dark +--------------------y,(0022.0)⏎ > day +----------i,(0094.0)c,(008c.0)t,(008a.0)i,(0088.0)o,(0086.0)n,(0084.0)a,(0082.0)r,(0053.0)y,(0022.0)⏎ > dictionary +--------------------s,(0092.0)a,(0090.0)b,(008e.0)l,(0031.0)e,(0022.0)⏎ > disable +e,(009a.0)n,(0092.0)a,(0090.0)b,(008e.0)l,(0031.0)e,(0022.0)⏎ > enable +f,(00a4.0)e,(009c.0)w,(0022.0)⏎ > few +----------i,(00a2.0)r,(00a0.0)s,(009e.0)t,(0022.0)⏎ > first +g,(00ad.0)r,(00ab.0)e,(00a9.0)e,(00a7.0)n,(0022.0)⏎ > green +i,(00bb.0)j,(00b9.0)k,(00b7.0)l,(00b5.0)m,(00b3.0)n,(00b1.0)o,(00af.0)p,(0022.0)⏎ > ijklmnop +j,(00bd.0)u,(00a0.0)s,(009e.0)t,(0022.0)⏎ > just +n,(00c1.0)e,(00bf.0)e,(0067.0)d,(0022.0)⏎ > need +o,(00c9.0)f,(0022.0)⏎ > of +----------n,(0022.0)⏎ > on +----------r,(00c7.0)a,(00c5.0)n,(00c3.0)g,(0031.0)e,(0022.0)⏎ > orange +p,(00db.0)l,(00d9.0)a,(00d7.0)y,(00d4.0)⏎ > play +----------------------------------------e,(00cd.0)d,(0022.0)⏎ > played +--------------------------------------------------r,(0022.0)⏎ > player +----------------------------------------i,(00d2.0)n,(00d0.0)g,(0022.0)⏎ > playing +q,(00ef.0)r,(00ed.0)s,(00eb.0)t,(00e9.0)u,(00e7.0)v,(00e5.0)w,(00e3.0)x,(00e1.0)ἀ,(0022.0)⏎ > qrstuvwxἀ +r,(00bf.0)e,(0067.0)d,(0022.0)⏎ > red +s,(00f5.0)p,(00f3.0)l,(00f1.0)i,(009e.0)t,(0022.0)⏎ > split +t,(00fb.0)a,(00f9.0)l,(00f7.0)k,(00d4.0)⏎ > talk +----------------------------------------e,(00cd.0)d,(0022.0)⏎ > talked +--------------------------------------------------r,(0022.0)⏎ > talker +----------------------------------------i,(00d2.0)n,(00d0.0)g,(0022.0)⏎ > talking +----------h,(0031.0)e,(0022.0)⏎ > the +----------o,(0022.0)⏎ > to +u,(00ff.0)s,(0031.0)e,(0022.0)⏎ > use +w,(0107.0)a,(00f9.0)l,(00f7.0)k,(00d4.0)⏎ > walk +----------------------------------------e,(00cd.0)d,(0022.0)⏎ > walked +--------------------------------------------------r,(0022.0)⏎ > walker +----------------------------------------i,(00d2.0)n,(00d0.0)g,(0022.0)⏎ > walking +----------o,(0105.0)r,(0103.0)d,(0101.0)s,(0022.0)⏎ > words +y,(0110.0)e,(010e.0)l,(010c.0)l,(010a.0)o,(009c.0)w,(0022.0)⏎ > yellow +Έ,(0138.0)Ὴ,(0132.0)Ή,(012e.0)ῌ,(0128.0)῍,(0122.0)῎,(011c.0)῏,(0116.0)ῐ,(0022.0)⏎ > ΈῊΉῌ῍῎῏ῐ +ά,(015c.0)ὲ,(0156.0)έ,(0152.0)ὴ,(014c.0)ή,(0148.0)ὶ,(0142.0)ί,(013e.0)ὸ,(0022.0)⏎ > άὲέὴήὶίὸ +ό,(0183.0)ὺ,(017d.0)ύ,(0179.0)ὼ,(0173.0)ώ,(016f.0)὾,(0169.0)὿,(0163.0)ᾀ,(0022.0)⏎ > όὺύὼώ὾὿ᾀ +ພ,(01a9.0)າ,(01a3.0)ສ,(019d.0)າ,(0197.0)ລ,(0191.0)າ,(018b.0)ວ,(0022.0)⏎ > ພາສາລາວ +ት,(01bf.0)ግ,(01b9.0)ር,(01b3.0)ኛ,(0022.0)⏎ > ትግርኛ +አ,(01c7.0)ማ,(01b9.0)ር,(01b3.0)ኛ,(0022.0)⏎ > አማርኛ +ᐃ,(01e7.0)ᓄ,(01e1.0)ᒃ,(01db.0)ᑎ,(01d5.0)ᑐ,(01cf.0)ᑦ,(0022.0)⏎ > ᐃᓄᒃᑎᑐᑦ +ᐊ,(020f.0)ᓂ,(0209.0)ᔑ,(0203.0)ᓈ,(01fd.0)ᐯ,(01f7.0)ᒧ,(01f1.0)ᐎ,(01eb.0)ᓐ,(0022.0)⏎ > ᐊᓂᔑᓈᐯᒧᐎᓐ +ᓀ,(0236.0)ᐦ,(0230.0)ᐃ,(022a.0)ᔭ,(0224.0)ᐍ,(021e.0)ᐏ,(0218.0)ᐣ,(0022.0)⏎ > ᓀᐦᐃᔭᐍᐏᐣ +ἁ,(0262.0)ἂ,(025c.0)ἃ,(0256.0)ἄ,(0250.0)ἅ,(024a.0)ἆ,(0244.0)ἇ,(023e.0)Ἀ,(0022.0)⏎ > ἁἂἃἄἅἆἇἈ +Ἁ,(028a.0)Ἂ,(0284.0)Ἃ,(027e.0)Ἄ,(0278.0)Ἅ,(0272.0)Ἆ,(026c.0)Ἇ,(0266.0)ἐ,(0022.0)⏎ > ἉἊἋἌἍἎἏἐ +ἑ,(02b4.0)ἒ,(02ae.0)ἓ,(02a8.0)ἔ,(02a2.0)ἕ,(029c.0)἖,(0296.0)἗,(0290.0)Ἐ,(0022.0)⏎ > ἑἒἓἔἕ἖἗Ἐ +Ἑ,(02de.0)Ἒ,(02d8.0)Ἓ,(02d2.0)Ἔ,(02cc.0)Ἕ,(02c6.0)἞,(02c0.0)἟,(02ba.0)ἠ,(0022.0)⏎ > ἙἚἛἜἝ἞἟ἠ +ἡ,(0308.0)ἢ,(0302.0)ἣ,(02fc.0)ἤ,(02f6.0)ἥ,(02f0.0)ἦ,(02ea.0)ἧ,(02e4.0)Ἠ,(0022.0)⏎ > ἡἢἣἤἥἦἧἨ +Ἡ,(0332.0)Ἢ,(032c.0)Ἣ,(0326.0)Ἤ,(0320.0)Ἥ,(031a.0)Ἦ,(0314.0)Ἧ,(030e.0)ἰ,(0022.0)⏎ > ἩἪἫἬἭἮἯἰ +ἱ,(035a.0)ἲ,(0354.0)ἳ,(034e.0)ἴ,(0348.0)ἵ,(0342.0)ἶ,(033c.0)ἷ,(0336.0)Ἰ,(0022.0)⏎ > ἱἲἳἴἵἶἷἸ +Ἱ,(0382.0)Ἲ,(037c.0)Ἳ,(0376.0)Ἴ,(0370.0)Ἵ,(036a.0)Ἶ,(0364.0)Ἷ,(035e.0)ὀ,(0022.0)⏎ > ἹἺἻἼἽἾἿὀ +ὁ,(03b3.0)ὂ,(03ad.0)ὃ,(03a7.0)ὄ,(03a1.0)ὅ,(039b.0)὆,(0395.0)὇,(038f.0)Ὀ,(0022.0)⏎ > ὁὂὃὄὅ὆὇Ὀ +Ὁ,(03db.0)Ὂ,(03d5.0)Ὃ,(03cf.0)Ὄ,(03c9.0)Ὅ,(03c3.0)὎,(03bd.0)὏,(03b7.0)ὐ,(0022.0)⏎ > ὉὊὋὌὍ὎὏ὐ +ὑ,(0403.0)ὒ,(03fd.0)ὓ,(03f7.0)ὔ,(03f1.0)ὕ,(03eb.0)ὖ,(03e5.0)ὗ,(03df.0)὘,(0022.0)⏎ > ὑὒὓὔὕὖὗ὘ +Ὑ,(042b.0)὚,(0425.0)Ὓ,(041f.0)὜,(0419.0)Ὕ,(0413.0)὞,(040d.0)Ὗ,(0407.0)ὠ,(0022.0)⏎ > Ὑ὚Ὓ὜Ὕ὞Ὗὠ +ὡ,(0453.0)ὢ,(044d.0)ὣ,(0447.0)ὤ,(0441.0)ὥ,(043b.0)ὦ,(0435.0)ὧ,(042f.0)Ὠ,(0022.0)⏎ > ὡὢὣὤὥὦὧὨ +Ὡ,(047b.0)Ὢ,(0475.0)Ὣ,(046f.0)Ὤ,(0469.0)Ὥ,(0463.0)Ὦ,(045d.0)Ὧ,(0457.0)ὰ,(0022.0)⏎ > ὩὪὫὬὭὮὯὰ +ᾁ,(04aa.0)ᾂ,(04a4.0)ᾃ,(049e.0)ᾄ,(0498.0)ᾅ,(0492.0)ᾆ,(048c.0)ᾇ,(0486.0)ᾈ,(0022.0)⏎ > ᾁᾂᾃᾄᾅᾆᾇᾈ +ᾉ,(04d2.0)ᾊ,(04cc.0)ᾋ,(04c6.0)ᾌ,(04c0.0)ᾍ,(04ba.0)ᾎ,(04b4.0)ᾏ,(04ae.0)ᾐ,(0022.0)⏎ > ᾉᾊᾋᾌᾍᾎᾏᾐ +ᾑ,(04fa.0)ᾒ,(04f4.0)ᾓ,(04ee.0)ᾔ,(04e8.0)ᾕ,(04e2.0)ᾖ,(04dc.0)ᾗ,(04d6.0)ᾘ,(0022.0)⏎ > ᾑᾒᾓᾔᾕᾖᾗᾘ +ᾙ,(0522.0)ᾚ,(051c.0)ᾛ,(0516.0)ᾜ,(0510.0)ᾝ,(050a.0)ᾞ,(0504.0)ᾟ,(04fe.0)ᾠ,(0022.0)⏎ > ᾙᾚᾛᾜᾝᾞᾟᾠ +ᾡ,(054a.0)ᾢ,(0544.0)ᾣ,(053e.0)ᾤ,(0538.0)ᾥ,(0532.0)ᾦ,(052c.0)ᾧ,(0526.0)ᾨ,(0022.0)⏎ > ᾡᾢᾣᾤᾥᾦᾧᾨ +ᾩ,(0572.0)ᾪ,(056c.0)ᾫ,(0566.0)ᾬ,(0560.0)ᾭ,(055a.0)ᾮ,(0554.0)ᾯ,(054e.0)ᾰ,(0022.0)⏎ > ᾩᾪᾫᾬᾭᾮᾯᾰ +ᾱ,(059a.0)ᾲ,(0594.0)ᾳ,(058e.0)ᾴ,(0588.0)᾵,(0582.0)ᾶ,(057c.0)ᾷ,(0576.0)Ᾰ,(0022.0)⏎ > ᾱᾲᾳᾴ᾵ᾶᾷᾸ +Ᾱ,(05be.0)Ὰ,(05b8.0)Ά,(05b4.0)ᾼ,(05ae.0)᾽,(05a8.0)ι,(05a4.0)᾿,(059e.0)῀,(0022.0)⏎ > ᾹᾺΆᾼ᾽ι᾿῀ +῁,(05ef.0)ῂ,(05e9.0)ῃ,(05e3.0)ῄ,(05dd.0)῅,(05d7.0)ῆ,(05d1.0)ῇ,(05cb.0)Ὲ,(0022.0)⏎ > ῁ῂῃῄ῅ῆῇῈ +ῑ,(0615.0)ῒ,(060f.0)ΐ,(060b.0)῔,(0605.0)῕,(05ff.0)ῖ,(05f9.0)ῗ,(05f3.0)Ῐ,(0022.0)⏎ > ῑῒΐ῔῕ῖῗῘ +Ῑ,(063b.0)Ὶ,(0635.0)Ί,(0631.0)῜,(062b.0)῝,(0625.0)῞,(061f.0)῟,(0619.0)ῠ,(0022.0)⏎ > ῙῚΊ῜῝῞῟ῠ +ῡ,(0661.0)ῢ,(065b.0)ΰ,(0657.0)ῤ,(0651.0)ῥ,(064b.0)ῦ,(0645.0)ῧ,(063f.0)Ῠ,(0022.0)⏎ > ῡῢΰῤῥῦῧῨ +Ῡ,(0681.0)Ὺ,(067b.0)Ύ,(0677.0)Ῥ,(0671.0)῭,(066b.0)΅,(0667.0)\`,(0665.0)῰,(0022.0)⏎ > ῩῪΎῬ῭΅\`῰ +῱,(06a9.0)ῲ,(06a3.0)ῳ,(069d.0)ῴ,(0697.0)῵,(0691.0)ῶ,(068b.0)ῷ,(0685.0)Ὸ,(0022.0)⏎ > ῱ῲῳῴ῵ῶῷῸ +ꦧ,(06cb.0)ꦱ,(06c5.0)ꦗ,(06bf.0)ꦮ,(0022.0)⏎ > ꦧꦱꦗꦮ +😀,(0707.0)😃,(06ff.0)😄,(06f7.0)😁,(06ef.0)😆,(06e7.0)🥹,(06df.0)😅,(06d7.0)😂,(0022.0)⏎ > 😀😃😄😁😆🥹😅😂 +😌,(073f.0)😍,(0737.0)🥰,(072f.0)😘,(0727.0)😗,(071f.0)😙,(0717.0)😚,(070f.0)😋,(0022.0)⏎ > 😌😍🥰😘😗😙😚😋 +😕,(077b.0)🙁,(0773.0)☹,(076d.0)️,(0767.0)😣,(075f.0)😖,(0757.0)😫,(074f.0)😩,(0747.0)🥺,(0022.0)⏎ > 😕🙁☹️😣😖😫😩🥺 +😛,(07b3.0)😝,(07ab.0)😜,(07a3.0)🤪,(079b.0)🤨,(0793.0)🧐,(078b.0)🤓,(0783.0)😎,(0022.0)⏎ > 😛😝😜🤪🤨🧐🤓😎 +😢,(07eb.0)😭,(07e3.0)😤,(07db.0)😠,(07d3.0)😡,(07cb.0)🤬,(07c3.0)🤯,(07bb.0)😳,(0022.0)⏎ > 😢😭😤😠😡🤬🤯😳 +🙄,(0829.0)😯,(0821.0)😦,(0819.0)😧,(0811.0)😮,(0809.0)😲,(0801.0)🥱,(07f9.0)😴,(0022.0)⏎ > 🙄😯😦😧😮😲🥱😴 +🤗,(0861.0)🤔,(0859.0)🫣,(0851.0)🤭,(0849.0)🫢,(0841.0)🫡,(0839.0)🤫,(0831.0)🫠,(0022.0)⏎ > 🤗🤔🫣🤭🫢🫡🤫🫠 +🤣,(089d.0)🥲,(0895.0)☺,(088f.0)️,(0889.0)😊,(0881.0)😇,(0879.0)🙂,(0871.0)🙃,(0869.0)😉,(0022.0)⏎ > 🤣🥲☺️😊😇🙂🙃😉 +🤤,(08f1.0)😪,(08e9.0)😮,(08e1.0)‍,(08db.0)💨,(08d3.0)😵,(08cb.0)😵,(08c3.0)‍,(08bd.0)💫,(08b5.0)🤐,(08ad.0)🥴,(08a5.0)🤢,(0022.0)⏎ > 🤤😪😮‍💨😵😵‍💫🤐🥴🤢 +🤥,(0929.0)😶,(0921.0)🫥,(0919.0)😐,(0911.0)🫤,(0909.0)😑,(0901.0)🫨,(08f9.0)😬,(0022.0)⏎ > 🤥😶🫥😐🫤😑🫨😬 +🤮,(095f.0)🤧,(0957.0)😷,(094f.0)🤒,(0947.0)🤕,(093f.0)🤑,(0937.0)🤠,(092f.0)😈,(0022.0)⏎ > 🤮🤧😷🤒🤕🤑🤠😈 +🥵,(09b1.0)🥶,(09a9.0)😶,(09a1.0)‍,(099b.0)🌫,(0993.0)️,(098d.0)😱,(0985.0)😨,(097d.0)😰,(0975.0)😥,(096d.0)😓,(0022.0)⏎ > 🥵🥶😶‍🌫️😱😨😰😥😓 +🥸,(09e9.0)🤩,(09e1.0)🥳,(09d9.0)😏,(09d1.0)😒,(09c9.0)😞,(09c1.0)😔,(09b9.0)😟,(0022.0)⏎ > 🥸🤩🥳😏😒😞😔😟" +`; + +exports[`optimization > optimize 'sampleWords()' [ 'Here', 'are', 'a', 'few', 'words', 'to', 'use', 'as', 'a', …(97) ] > With optimization and string table 1`] = ` +"A,(0025.0)B,(0025.1)C,(0023.0)D,(0023.1)E,(0023.2)F,(0023.3)G,(0023.4)H,(0022.0)⏎ > ABCDEFGH +H,(0029.0)e,(0029.1)r,(0027.0)e,(0022.0)⏎ > Here +I,(002d.0)J,(002d.1)K,(002b.0)L,(002b.1)M,(002b.2)N,(002b.3)O,(002b.4)P,(0022.0)⏎ > IJKLMNOP +Q,(0031.0)R,(0031.1)S,(002f.0)T,(002f.1)U,(002f.2)V,(002f.3)W,(002f.4)X,(0022.0)⏎ > QRSTUVWX +T,(0035.0)h,(0035.1)e,(0033.0)y,(0022.0)⏎ > They +Y,(0037.0)Z,(0022.0)⏎ > YZ +^,(0022.0)⏎ > ^ +\`,(0022.0)⏎ > \` +a,(003f.0)⏎ > a +----------b,(003b.0)c,(0039.0)d,(0039.1)e,(0039.2)f,(0039.3)g,(0039.4)h,(0022.0)⏎ > abcdefgh +----------n,(003d.0)d,(0022.0)⏎ > and +----------r,(0027.0)e,(0022.0)⏎ > are +----------s,(0022.0)⏎ > as +b,(0046.0)e,(0022.0)⏎ > be +----------l,(0044.0)u,(0027.0)e,(0022.0)⏎ > blue +c,(0049.0)s,(0049.1)p,(0049.2)e,(0049.3)l,(0049.4)l,(0022.0)⏎ > cspell +d,(0059.0)a,(004d.0)r,(004b.0)k,(0022.0)⏎ > dark +--------------------y,(0022.0)⏎ > day +----------i,(0056.0)c,(0052.0)t,(0050.0)i,(0050.1)o,(0050.2)n,(0050.3)a,(0050.4)r,(0033.0)y,(0022.0)⏎ > dictionary +--------------------s,(0054.0)a,(0054.1)b,(0054.2)l,(0027.0)e,(0022.0)⏎ > disable +e,(005c.0)n,(0054.0)a,(0054.1)b,(0054.2)l,(0027.0)e,(0022.0)⏎ > enable +f,(0066.0)e,(005e.0)w,(0022.0)⏎ > few +----------i,(0064.0)r,(0062.0)s,(0060.0)t,(0022.0)⏎ > first +g,(0069.0)r,(0069.1)e,(0069.2)e,(0069.3)n,(0022.0)⏎ > green +i,(006d.0)j,(006d.1)k,(006b.0)l,(006b.1)m,(006b.2)n,(006b.3)o,(006b.4)p,(0022.0)⏎ > ijklmnop +j,(006f.0)u,(0062.0)s,(0060.0)t,(0022.0)⏎ > just +n,(0073.0)e,(0071.0)e,(003d.0)d,(0022.0)⏎ > need +o,(0077.0)f,(0022.0)⏎ > of +----------n,(0022.0)⏎ > on +----------r,(0075.0)a,(0075.1)n,(0075.2)g,(0027.0)e,(0022.0)⏎ > orange +p,(0083.0)l,(0083.1)a,(0083.2)y,(0080.0)⏎ > play +----------------------------------------e,(007b.0)d,(0022.0)⏎ > played +--------------------------------------------------r,(0022.0)⏎ > player +----------------------------------------i,(007e.0)n,(007e.1)g,(0022.0)⏎ > playing +q,(0089.0)r,(0089.1)s,(0089.2)t,(0089.3)u,(0087.0)v,(0087.1)w,(0087.2)x,(0087.3)ἀ,(0022.0)⏎ > qrstuvwxἀ +r,(0071.0)e,(003d.0)d,(0022.0)⏎ > red +s,(008b.0)p,(008b.1)l,(008b.2)i,(0060.0)t,(0022.0)⏎ > split +t,(008f.0)a,(008d.0)l,(008d.1)k,(0080.0)⏎ > talk +----------------------------------------e,(007b.0)d,(0022.0)⏎ > talked +--------------------------------------------------r,(0022.0)⏎ > talker +----------------------------------------i,(007e.0)n,(007e.1)g,(0022.0)⏎ > talking +----------h,(0027.0)e,(0022.0)⏎ > the +----------o,(0022.0)⏎ > to +u,(0093.0)s,(0027.0)e,(0022.0)⏎ > use +w,(0097.0)a,(008d.0)l,(008d.1)k,(0080.0)⏎ > walk +----------------------------------------e,(007b.0)d,(0022.0)⏎ > walked +--------------------------------------------------r,(0022.0)⏎ > walker +----------------------------------------i,(007e.0)n,(007e.1)g,(0022.0)⏎ > walking +----------o,(0095.0)r,(0095.1)d,(0095.2)s,(0022.0)⏎ > words +y,(009a.0)e,(009a.1)l,(009a.2)l,(009a.3)o,(005e.0)w,(0022.0)⏎ > yellow +Έ,(00a4.0)Ὴ,(00a4.3)Ή,(00a2.1)ῌ,(00a2.4)῍,(00a0.2)῎,(009e.0)῏,(009e.3)ῐ,(0022.0)⏎ > ΈῊΉῌ῍῎῏ῐ +ά,(00ae.0)ὲ,(00ac.1)έ,(00ac.3)ὴ,(00aa.1)ή,(00aa.3)ὶ,(00a8.1)ί,(00a8.3)ὸ,(0022.0)⏎ > άὲέὴήὶίὸ +ό,(00b9.1)ὺ,(00b7.0)ύ,(00b7.2)ὼ,(00b5.0)ώ,(00b5.2)὾,(00b3.0)὿,(00b3.3)ᾀ,(0022.0)⏎ > όὺύὼώ὾὿ᾀ +ພ,(00c1.2)າ,(00bf.0)ສ,(00bf.3)າ,(00bd.1)ລ,(00bd.4)າ,(00bb.2)ວ,(0022.0)⏎ > ພາສາລາວ +ት,(00c5.1)ግ,(00c3.0)ር,(00c3.3)ኛ,(0022.0)⏎ > ትግርኛ +አ,(00c7.1)ማ,(00c3.0)ር,(00c3.3)ኛ,(0022.0)⏎ > አማርኛ +ᐃ,(00cd.0)ᓄ,(00cd.3)ᒃ,(00cb.1)ᑎ,(00cb.4)ᑐ,(00c9.2)ᑦ,(0022.0)⏎ > ᐃᓄᒃᑎᑐᑦ +ᐊ,(00d5.0)ᓂ,(00d5.3)ᔑ,(00d3.1)ᓈ,(00d3.4)ᐯ,(00d1.2)ᒧ,(00cf.0)ᐎ,(00cf.3)ᓐ,(0022.0)⏎ > ᐊᓂᔑᓈᐯᒧᐎᓐ +ᓀ,(00e0.1)ᐦ,(00de.0)ᐃ,(00de.3)ᔭ,(00dc.1)ᐍ,(00dc.4)ᐏ,(00da.2)ᐣ,(0022.0)⏎ > ᓀᐦᐃᔭᐍᐏᐣ +ἁ,(00ea.0)ἂ,(00ea.3)ἃ,(00e8.1)ἄ,(00e8.4)ἅ,(00e6.2)ἆ,(00e4.0)ἇ,(00e4.3)Ἀ,(0022.0)⏎ > ἁἂἃἄἅἆἇἈ +Ἁ,(00f2.0)Ἂ,(00f2.3)Ἃ,(00f0.1)Ἄ,(00f0.4)Ἅ,(00ee.2)Ἆ,(00ec.0)Ἇ,(00ec.3)ἐ,(0022.0)⏎ > ἉἊἋἌἍἎἏἐ +ἑ,(00fc.0)ἒ,(00fc.3)ἓ,(00fa.1)ἔ,(00fa.4)ἕ,(00f8.2)἖,(00f6.0)἗,(00f6.3)Ἐ,(0022.0)⏎ > ἑἒἓἔἕ἖἗Ἐ +Ἑ,(0106.0)Ἒ,(0106.3)Ἓ,(0104.1)Ἔ,(0104.4)Ἕ,(0102.2)἞,(0100.0)἟,(0100.3)ἠ,(0022.0)⏎ > ἙἚἛἜἝ἞἟ἠ +ἡ,(0110.0)ἢ,(0110.3)ἣ,(010e.1)ἤ,(010e.4)ἥ,(010c.2)ἦ,(010a.0)ἧ,(010a.3)Ἠ,(0022.0)⏎ > ἡἢἣἤἥἦἧἨ +Ἡ,(011a.0)Ἢ,(011a.3)Ἣ,(0118.1)Ἤ,(0118.4)Ἥ,(0116.2)Ἦ,(0114.0)Ἧ,(0114.3)ἰ,(0022.0)⏎ > ἩἪἫἬἭἮἯἰ +ἱ,(0122.0)ἲ,(0122.3)ἳ,(0120.1)ἴ,(0120.4)ἵ,(011e.2)ἶ,(011c.0)ἷ,(011c.3)Ἰ,(0022.0)⏎ > ἱἲἳἴἵἶἷἸ +Ἱ,(012a.0)Ἲ,(012a.3)Ἳ,(0128.1)Ἴ,(0128.4)Ἵ,(0126.2)Ἶ,(0124.0)Ἷ,(0124.3)ὀ,(0022.0)⏎ > ἹἺἻἼἽἾἿὀ +ὁ,(013b.0)ὂ,(013b.3)ὃ,(0139.1)ὄ,(0139.4)ὅ,(0137.2)὆,(0135.0)὇,(0135.3)Ὀ,(0022.0)⏎ > ὁὂὃὄὅ὆὇Ὀ +Ὁ,(0143.0)Ὂ,(0143.3)Ὃ,(0141.1)Ὄ,(0141.4)Ὅ,(013f.2)὎,(013d.0)὏,(013d.3)ὐ,(0022.0)⏎ > ὉὊὋὌὍ὎὏ὐ +ὑ,(014b.0)ὒ,(014b.3)ὓ,(0149.1)ὔ,(0149.4)ὕ,(0147.2)ὖ,(0145.0)ὗ,(0145.3)὘,(0022.0)⏎ > ὑὒὓὔὕὖὗ὘ +Ὑ,(0153.0)὚,(0153.3)Ὓ,(0151.1)὜,(0151.4)Ὕ,(014f.2)὞,(014d.0)Ὗ,(014d.3)ὠ,(0022.0)⏎ > Ὑ὚Ὓ὜Ὕ὞Ὗὠ +ὡ,(015b.0)ὢ,(015b.3)ὣ,(0159.1)ὤ,(0159.4)ὥ,(0157.2)ὦ,(0155.0)ὧ,(0155.3)Ὠ,(0022.0)⏎ > ὡὢὣὤὥὦὧὨ +Ὡ,(0163.0)Ὢ,(0163.3)Ὣ,(0161.1)Ὤ,(0161.4)Ὥ,(015f.2)Ὦ,(015d.0)Ὧ,(015d.3)ὰ,(0022.0)⏎ > ὩὪὫὬὭὮὯὰ +ᾁ,(0172.0)ᾂ,(0172.3)ᾃ,(0170.1)ᾄ,(0170.4)ᾅ,(016e.2)ᾆ,(016c.0)ᾇ,(016c.3)ᾈ,(0022.0)⏎ > ᾁᾂᾃᾄᾅᾆᾇᾈ +ᾉ,(017a.0)ᾊ,(017a.3)ᾋ,(0178.1)ᾌ,(0178.4)ᾍ,(0176.2)ᾎ,(0174.0)ᾏ,(0174.3)ᾐ,(0022.0)⏎ > ᾉᾊᾋᾌᾍᾎᾏᾐ +ᾑ,(0182.0)ᾒ,(0182.3)ᾓ,(0180.1)ᾔ,(0180.4)ᾕ,(017e.2)ᾖ,(017c.0)ᾗ,(017c.3)ᾘ,(0022.0)⏎ > ᾑᾒᾓᾔᾕᾖᾗᾘ +ᾙ,(018a.0)ᾚ,(018a.3)ᾛ,(0188.1)ᾜ,(0188.4)ᾝ,(0186.2)ᾞ,(0184.0)ᾟ,(0184.3)ᾠ,(0022.0)⏎ > ᾙᾚᾛᾜᾝᾞᾟᾠ +ᾡ,(0192.0)ᾢ,(0192.3)ᾣ,(0190.1)ᾤ,(0190.4)ᾥ,(018e.2)ᾦ,(018c.0)ᾧ,(018c.3)ᾨ,(0022.0)⏎ > ᾡᾢᾣᾤᾥᾦᾧᾨ +ᾩ,(019a.0)ᾪ,(019a.3)ᾫ,(0198.1)ᾬ,(0198.4)ᾭ,(0196.2)ᾮ,(0194.0)ᾯ,(0194.3)ᾰ,(0022.0)⏎ > ᾩᾪᾫᾬᾭᾮᾯᾰ +ᾱ,(01a2.0)ᾲ,(01a2.3)ᾳ,(01a0.1)ᾴ,(01a0.4)᾵,(019e.2)ᾶ,(019c.0)ᾷ,(019c.3)Ᾰ,(0022.0)⏎ > ᾱᾲᾳᾴ᾵ᾶᾷᾸ +Ᾱ,(01aa.0)Ὰ,(01a8.0)Ά,(01a8.2)ᾼ,(01a6.0)᾽,(01a6.3)ι,(01a4.0)᾿,(01a4.3)῀,(0022.0)⏎ > ᾹᾺΆᾼ᾽ι᾿῀ +῁,(01bb.0)ῂ,(01bb.3)ῃ,(01b9.1)ῄ,(01b9.4)῅,(01b7.2)ῆ,(01b5.0)ῇ,(01b5.3)Ὲ,(0022.0)⏎ > ῁ῂῃῄ῅ῆῇῈ +ῑ,(01c3.0)ῒ,(01c3.3)ΐ,(01c1.1)῔,(01c1.4)῕,(01bf.2)ῖ,(01bd.0)ῗ,(01bd.3)Ῐ,(0022.0)⏎ > ῑῒΐ῔῕ῖῗῘ +Ῑ,(01cb.0)Ὶ,(01cb.3)Ί,(01c9.1)῜,(01c9.4)῝,(01c7.2)῞,(01c5.0)῟,(01c5.3)ῠ,(0022.0)⏎ > ῙῚΊ῜῝῞῟ῠ +ῡ,(01d3.0)ῢ,(01d3.3)ΰ,(01d1.1)ῤ,(01d1.4)ῥ,(01cf.2)ῦ,(01cd.0)ῧ,(01cd.3)Ῠ,(0022.0)⏎ > ῡῢΰῤῥῦῧῨ +Ῡ,(01db.0)Ὺ,(01d9.2)Ύ,(01d9.4)Ῥ,(01d7.2)῭,(01d5.0)΅,(01d5.2)\`,(01d5.3)῰,(0022.0)⏎ > ῩῪΎῬ῭΅\`῰ +῱,(01e3.0)ῲ,(01e3.3)ῳ,(01e1.1)ῴ,(01e1.4)῵,(01df.2)ῶ,(01dd.0)ῷ,(01dd.3)Ὸ,(0022.0)⏎ > ῱ῲῳῴ῵ῶῷῸ +ꦧ,(01f7.1)ꦱ,(01f7.4)ꦗ,(01f5.2)ꦮ,(0022.0)⏎ > ꦧꦱꦗꦮ +😀,(0205.0)😃,(0203.1)😄,(0201.0)😁,(0201.4)😆,(01ff.3)🥹,(01fd.2)😅,(01fb.1)😂,(0022.0)⏎ > 😀😃😄😁😆🥹😅😂 +😌,(0211.0)😍,(020f.1)🥰,(020d.0)😘,(020d.4)😗,(020b.3)😙,(0209.2)😚,(0207.1)😋,(0022.0)⏎ > 😌😍🥰😘😗😙😚😋 +😕,(021d.0)🙁,(021d.4)☹,(021b.2)️,(0219.0)😣,(0219.4)😖,(0217.3)😫,(0215.2)😩,(0213.1)🥺,(0022.0)⏎ > 😕🙁☹️😣😖😫😩🥺 +😛,(0229.0)😝,(0227.1)😜,(0225.0)🤪,(0225.4)🤨,(0223.3)🧐,(0221.2)🤓,(021f.1)😎,(0022.0)⏎ > 😛😝😜🤪🤨🧐🤓😎 +😢,(0235.0)😭,(0233.1)😤,(0231.0)😠,(0231.4)😡,(022f.3)🤬,(022d.2)🤯,(022b.1)😳,(0022.0)⏎ > 😢😭😤😠😡🤬🤯😳 +🙄,(0247.1)😯,(0245.2)😦,(0243.0)😧,(0243.4)😮,(0241.3)😲,(023f.2)🥱,(023d.1)😴,(0022.0)⏎ > 🙄😯😦😧😮😲🥱😴 +🤗,(0253.0)🤔,(0251.2)🫣,(024f.1)🤭,(024d.0)🫢,(024d.4)🫡,(024b.3)🤫,(0249.2)🫠,(0022.0)⏎ > 🤗🤔🫣🤭🫢🫡🤫🫠 +🤣,(025f.0)🥲,(025d.0)☺,(025d.3)️,(025b.1)😊,(0259.0)😇,(0259.4)🙂,(0257.3)🙃,(0255.2)😉,(0022.0)⏎ > 🤣🥲☺️😊😇🙂🙃😉 +🤤,(0271.0)😪,(026f.2)😮,(026d.1)‍,(026d.4)💨,(026b.3)😵,(0269.2)😵,(0267.1)‍,(0267.4)💫,(0265.3)🤐,(0263.2)🥴,(0261.1)🤢,(0022.0)⏎ > 🤤😪😮‍💨😵😵‍💫🤐🥴🤢 +🤥,(027d.0)😶,(027b.1)🫥,(0279.0)😐,(0279.4)🫤,(0277.3)😑,(0275.2)🫨,(0273.1)😬,(0022.0)⏎ > 🤥😶🫥😐🫤😑🫨😬 +🤮,(0289.0)🤧,(0287.2)😷,(0285.1)🤒,(0283.0)🤕,(0283.4)🤑,(0281.3)🤠,(027f.2)😈,(0022.0)⏎ > 🤮🤧😷🤒🤕🤑🤠😈 +🥵,(029f.0)🥶,(029d.1)😶,(029b.0)‍,(029b.3)🌫,(0299.2)️,(0297.0)😱,(0297.4)😨,(0295.3)😰,(0293.2)😥,(0291.1)😓,(0022.0)⏎ > 🥵🥶😶‍🌫️😱😨😰😥😓 +🥸,(02ab.0)🤩,(02a9.2)🥳,(02a7.0)😏,(02a7.4)😒,(02a5.3)😞,(02a3.2)😔,(02a1.1)😟,(0022.0)⏎ > 🥸🤩🥳😏😒😞😔😟" +`; + +exports[`optimization > optimize 'single word' [ 'optimization' ] > No optimization 1`] = `"o,(0003.0)p,(0005.0)t,(0007.0)i,(0009.0)m,(000b.0)i,(000d.0)z,(000f.0)a,(0011.0)t,(0013.0)i,(0015.0)o,(0017.0)n,(0002.0)⏎ > optimization"`; + +exports[`optimization > optimize 'single word' [ 'optimization' ] > With optimization 1`] = `"o,(0017.0)p,(0015.0)t,(0013.0)i,(0011.0)m,(000f.0)i,(000d.0)z,(000b.0)a,(0009.0)t,(0007.0)i,(0005.0)o,(0003.0)n,(0002.0)⏎ > optimization"`; + +exports[`optimization > optimize 'single word' [ 'optimization' ] > With optimization and string table 1`] = `"o,(0000.1)p,(0000.2)t,(0004.0)i,(0004.1)m,(0004.2)i,(0004.3)z,(0004.4)a,(0003.0)t,(0003.1)i,(0003.2)o,(0003.3)n,(0003.4)⏎ > optimization"`; diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.test.ts index 0163a100c672..84eaae4e3261 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.test.ts @@ -1,6 +1,8 @@ import { describe, expect, test } from 'vitest'; -import { createTrieBlob } from './createTrieBlob.ts'; +import { trieRootToITrieRoot } from '../TrieNode/trie.ts'; +import { createTrieRootFromList } from '../TrieNode/trie-util.ts'; +import { createTrieBlob, createTrieBlobFromITrieRoot, createTrieBlobFromTrieRoot } from './createTrieBlob.ts'; describe('FastTrieBlob', () => { const words = [ @@ -23,6 +25,24 @@ describe('FastTrieBlob', () => { test('createTrieBlob', () => { const trieBlob = createTrieBlob(words); - expect([...trieBlob.words()].sort()).toEqual([...words].sort()); + expect([...trieBlob.words()]).toEqual([...words].sort()); + }); + + test('createTrieBlobFromITrieRoot', () => { + const src = createTrieBlob(words); + const trieBlob = createTrieBlobFromITrieRoot(src.getRoot()); + expect([...trieBlob.words()]).toEqual([...words].sort()); + }); + + test('createTrieBlobFromTrieRoot', () => { + const src = createTrieRootFromList(words); + const trieBlob = createTrieBlobFromTrieRoot(src); + expect([...trieBlob.words()]).toEqual([...words].sort()); + }); + + test('createTrieBlobFromITrieRoot using trieRootToITrieRoot', () => { + const src = createTrieRootFromList(words); + const trieBlob = createTrieBlobFromITrieRoot(trieRootToITrieRoot(src)); + expect([...trieBlob.words()]).toEqual([...words].sort()); }); }); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts index c75484fa1a47..9324d5626150 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts @@ -1,17 +1,27 @@ +import type { BuildOptions } from '../BuildOptions.ts'; +import type { ITrieNodeRoot } from '../ITrieNode/index.ts'; import type { PartialTrieInfo } from '../ITrieNode/TrieInfo.ts'; import type { Trie } from '../trie.ts'; import type { TrieRoot } from '../TrieNode/TrieNode.ts'; import type { TrieBlob } from './TrieBlob.ts'; import { TrieBlobBuilder } from './TrieBlobBuilder.ts'; -export function createTrieBlob(words: readonly string[], options?: PartialTrieInfo): TrieBlob { - return TrieBlobBuilder.fromWordList(words, options); +export function createTrieBlob( + words: readonly string[], + options?: PartialTrieInfo, + buildOptions?: BuildOptions, +): TrieBlob { + return TrieBlobBuilder.fromWordList(words, options, buildOptions); } -export function createTrieBlobFromTrie(trie: Trie): TrieBlob { - return TrieBlobBuilder.fromTrieRoot(trie.root); +export function createTrieBlobFromTrie(trie: Trie, buildOptions?: BuildOptions): TrieBlob { + return createTrieBlobFromTrieRoot(trie.root, buildOptions); } -export function createTrieBlobFromTrieRoot(trie: TrieRoot): TrieBlob { - return TrieBlobBuilder.fromTrieRoot(trie); +export function createTrieBlobFromTrieRoot(trie: TrieRoot, buildOptions?: BuildOptions): TrieBlob { + return TrieBlobBuilder.fromTrieRoot(trie, buildOptions); +} + +export function createTrieBlobFromITrieRoot(trie: ITrieNodeRoot, buildOptions?: BuildOptions): TrieBlob { + return TrieBlobBuilder.fromITrieRoot(trie, buildOptions); } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/index.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/index.ts index a0c1ecc0f0e6..8dff74d21741 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/index.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/index.ts @@ -1,3 +1,9 @@ +export { + createTrieBlob, + createTrieBlobFromITrieRoot, + createTrieBlobFromTrie, + createTrieBlobFromTrieRoot, +} from './createTrieBlob.ts'; export { TrieBlob } from './TrieBlob.ts'; export { isBTrieData } from './TrieBlobEncoder.ts'; export { decodeBTrie, encodeTrieDataToBTrie } from './trieDataEncoder.ts'; diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/optimizeNodes.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/optimizeNodes.ts index b74abf680ab4..25f91b43de3a 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/optimizeNodes.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/optimizeNodes.ts @@ -8,12 +8,13 @@ import { type TrieBlobNode32, } from './TrieBlobFormat.ts'; +const MAX_AUTO_ADD_TO_STRING_TABLE = 4; + /** * Convert from a Trie to a DAWG by merging identical nodes. * @param nodes - the nodes to optimize. This array and the contents WILL BE CHANGED and used as a scratch space. * @returns the optimized nodes. */ - export function optimizeNodes(nodes: FastTrieBlobNodes32): FastTrieBlobNodes32 { /** the has map to look up locked nodes. */ const nodeHashMap: Map = new Map(); @@ -191,6 +192,8 @@ function copyNodesAndStringTable(src: NodesAndStringTable): NodesAndStringTableB export function optimizeNodesWithStringTable(src: NodesAndStringTable): NodesAndStringTable { const { nodes, stringTableBuilder: builder } = copyNodesAndStringTable(src); + const multipleNodeRefs = calcHasMultipleReferences(nodes); + const multiStringRefs = new Set([0]); if (!builder.length) { // Add the empty string to take up index 0. @@ -208,6 +211,7 @@ export function optimizeNodesWithStringTable(src: NodesAndStringTable): NodesAnd function processNode(nodeIdx: number): void { const node = nodes[nodeIdx]; if (node.length !== 2) return; + const header = node[0]; // An end of word node cannot be merged with a prefix. if ((header & NodeHeaderEOWMask) !== 0) return; @@ -216,11 +220,18 @@ export function optimizeNodesWithStringTable(src: NodesAndStringTable): NodesAnd const childEntry = node[1]; const charByte = childEntry & NodeMaskCharByte; - const childNode = nodes[childEntry >>> 8]; + const childIdx = childEntry >>> 8; + // We cannot merge with a child node that has multiple references. + if (multipleNodeRefs.has(childIdx)) return; + const childNode = nodes[childIdx]; const childHeader = childNode[0]; const childPrefixIdx = (childHeader & NodeHeaderPrefixMask) >>> NodeHeaderPrefixShift; const childBytes = builder.getEntry(childPrefixIdx) || []; + if (!multiStringRefs.has(childPrefixIdx)) { + multiStringRefs.add(childPrefixIdx); + if (childBytes.length >= MAX_AUTO_ADD_TO_STRING_TABLE) return; + } const prefixBytes = [charByte, ...childBytes]; const prefixIdx = builder.addStringBytes(prefixBytes); @@ -230,9 +241,34 @@ export function optimizeNodesWithStringTable(src: NodesAndStringTable): NodesAnd } } +function calcHasMultipleReferences(nodes: FastTrieBlobNodes32): Set { + const seen = new Set(); + const multiple = new Set(); + + walkNodes(nodes, 0, { + before: (nodeIdx) => { + if (seen.has(nodeIdx)) { + multiple.add(nodeIdx); + return true; + } + seen.add(nodeIdx); + return false; + }, + }); + + return multiple; +} + interface NodeWalkOptions { + /** + * @param nodeIdx + */ after?: (nodeIdx: number) => void; - before?: (nodeIdx: number) => void; + /** + * @param nodeIdx + * @returns true to stop going deeper. + */ + before?: (nodeIdx: number) => boolean | undefined; } function walkNodes(nodes: FastTrieBlobNodes32, nodeIdx: number, options: NodeWalkOptions): void { @@ -240,7 +276,7 @@ function walkNodes(nodes: FastTrieBlobNodes32, nodeIdx: number, options: NodeWal const before = options.before || (() => undefined); function walk(nodeIdx: number): void { - before(nodeIdx); + if (before(nodeIdx)) return; const node = nodes[nodeIdx]; const count = node.length - 1; diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/trieDataEncoder.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/trieDataEncoder.ts index b51ab6e7e439..693803f30fd0 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/trieDataEncoder.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/trieDataEncoder.ts @@ -1,13 +1,15 @@ +import type { BuildOptions } from '../BuildOptions.ts'; import type { TrieData } from '../TrieData.ts'; import { TrieBlob } from './TrieBlob.ts'; import { TrieBlobBuilder } from './TrieBlobBuilder.ts'; -export function encodeTrieDataToBTrie(data: TrieData): Uint8Array { - if (data.encodeToBTrie) { +export function encodeTrieDataToBTrie(data: TrieData, buildOptions?: BuildOptions): Uint8Array { + const needToBuild = buildOptions?.optimize || buildOptions?.useStringTable; + if (!needToBuild && data.encodeToBTrie) { return data.encodeToBTrie(); } - const trie = TrieBlobBuilder.fromWordList(data.words(), data.info); + const trie = TrieBlobBuilder.fromITrieRoot(data.getRoot(), buildOptions); return trie.encodeToBTrie(); } diff --git a/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts b/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts index c4254fab81b2..868fd5a3c2e8 100644 --- a/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts +++ b/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts @@ -148,6 +148,10 @@ class ImplITrieRoot extends ImplITrieNode implements ITrieNodeRoot { return this.root.stripCaseAndAccentsPrefix; } + get suggestionPrefix(): string { + return this.root.suggestionPrefix; + } + static toITrieNode(node: TrieRoot): ITrieNodeRoot { return new this(node); } diff --git a/packages/cspell-trie-lib/src/lib/buildITrie.ts b/packages/cspell-trie-lib/src/lib/buildITrie.ts index ac5b4582b34d..23fb2867aaa6 100644 --- a/packages/cspell-trie-lib/src/lib/buildITrie.ts +++ b/packages/cspell-trie-lib/src/lib/buildITrie.ts @@ -1,11 +1,16 @@ +import type { BuildOptions } from './BuildOptions.ts'; import type { ITrie } from './ITrie.ts'; import { ITrieImpl } from './ITrie.ts'; import type { PartialTrieInfo } from './ITrieNode/TrieInfo.ts'; import { TrieBlobBuilder } from './TrieBlob/TrieBlobBuilder.ts'; -export function buildITrieFromWords(words: Iterable, info: PartialTrieInfo = {}): ITrie { +export function buildITrieFromWords( + words: Iterable, + info: PartialTrieInfo = {}, + buildOptions?: BuildOptions, +): ITrie { const builder = new TrieBlobBuilder(info); builder.insert(words); - const tb = builder.build(); + const tb = builder.build(buildOptions); return new ITrieImpl(tb); } diff --git a/packages/cspell-trie-lib/src/lib/suggestions/genSuggestionsOptions.ts b/packages/cspell-trie-lib/src/lib/suggestions/genSuggestionsOptions.ts index d2d332540a87..9b795ca824fe 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/genSuggestionsOptions.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/genSuggestionsOptions.ts @@ -1,4 +1,5 @@ import type { WeightMap } from '../distance/index.ts'; +import { isDebuggerAttached } from '../utils/debugger.ts'; import { CompoundWordsMethod } from '../walker/index.ts'; export interface GenSuggestionOptionsStrict { @@ -75,7 +76,9 @@ export const defaultSuggestionOptions: SuggestionOptionsStrictRO = { ...defaultGenSuggestionOptions, numSuggestions: 8, includeTies: true, - timeout: 5000, + get timeout() { + return isDebuggerAttached() ? 1_000_000 : 1000; + }, }; type KeyOfGenSuggestionOptionsStrict = keyof GenSuggestionOptionsStrict; diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.ts index 7706ef43cb3a..475aded9203a 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.ts @@ -1,7 +1,8 @@ import type { TrieCost, WeightMap } from '../distance/weightedMaps.ts'; -import type { ITrieNode, TrieOptionsRO } from '../ITrieNode/index.ts'; +import type { ITrieNode, ITrieNodeId, TrieOptionsRO } from '../ITrieNode/index.ts'; import { CompoundWordsMethod, JOIN_SEPARATOR, WORD_SEPARATOR } from '../ITrieNode/walker/index.ts'; import type { TrieData } from '../TrieData.ts'; +import { isDebuggerAttached } from '../utils/debugger.ts'; import { PairingHeap } from '../utils/PairingHeap.ts'; import { opCosts } from './constants.ts'; import type { SuggestionOptionsRO } from './genSuggestionsOptions.ts'; @@ -78,6 +79,7 @@ export function* getSuggestionsAStar( const compRoot = root.get(comp); const compRootIgnoreCase = rootIgnoreCase && rootIgnoreCase.get(comp); const emitted: Record = Object.create(null); + const debug = isDebuggerAttached(); const srcLetters = [...srcWord]; @@ -155,6 +157,14 @@ export function* getSuggestionsAStar( if (p.n.eow && p.i === len) { const word = pNodeToWord(p); const result = { word, cost: p.c }; + if (debug) { + console.log('add possible suggestion: %o', { + ...result, + nodes: pNodeToDbgInfo(p) + .map(({ id, s, c, a }) => `${a}{${s || '∅'}} $${c}-> ${id} `) + .join(''), + }); + } resultHeap.add(result); } @@ -191,10 +201,12 @@ export function* getSuggestionsAStar( // Replace for (const [ss, node] of n.entries()) { - if (node.id === m?.id || ss in sc) continue; + // Don't replace with self - skip + if (ss === s || ss in sc) continue; const g = visMap[ss] || 0; // srcWord === 'WALK' && console.log(g.toString(2)); const c = sg & g ? costVis : cost; + // console.log('replace %s (%s) -> %s (%s)', formatNodeId(n.id), s, formatNodeId(node.id), ss); storePath(t, node, i + 1, c, ss, p, 'r', ss); } @@ -340,6 +352,33 @@ function getCostTrie(t: RO, s: string) { return tt; } +interface DgbNodePathInfo { + id: ITrieNodeId; + s: string; + c: Cost; + // action taken + a: string; +} + +function pNodeToDbgInfo(p: RO): DgbNodePathInfo[] { + const parts: DgbNodePathInfo[] = []; + let n: RO | undefined = p; + while (n) { + const id = formatNodeId(n.n.id); + parts.push({ id, s: n.s, c: n.c, a: n.a || '' }); + n = n.p; + } + parts.reverse(); + return parts; +} + +function formatNodeId(id: ITrieNodeId): string { + const s = id.toString(16).padStart(16, '0'); + const upper = s.slice(0, 8).replace(/^0+/, '').padStart(4, '0'); + const lower = s.slice(8).replace(/^0+/, ''); + return `${upper}${lower ? '.' + lower : ''}`; +} + function pNodeToWord(p: RO): string { const parts: string[] = []; let n: RO | undefined = p; diff --git a/packages/cspell-trie-lib/src/lib/utils/debugger.ts b/packages/cspell-trie-lib/src/lib/utils/debugger.ts new file mode 100644 index 000000000000..91a4354dc1ef --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/utils/debugger.ts @@ -0,0 +1,10 @@ +let debuggerIsAttached = false; + +export function setDebuggerAttached(attached: boolean): boolean { + debuggerIsAttached = attached; + return debuggerIsAttached; +} + +export function isDebuggerAttached(): boolean { + return debuggerIsAttached; +} diff --git a/packages/cspell-trie-lib/src/test/debugger.ts b/packages/cspell-trie-lib/src/test/debugger.ts new file mode 100644 index 000000000000..372d33349fa7 --- /dev/null +++ b/packages/cspell-trie-lib/src/test/debugger.ts @@ -0,0 +1,13 @@ +import inspector from 'node:inspector'; + +import { setDebuggerAttached } from '../lib/utils/debugger.ts'; + +/** + * Set the debug mode based on the inspector status or the provided value. + * @param isDebugging - Optional boolean to explicitly set debug mode. + * @returns the current debug mode. + */ +export function registerDebugMode(isDebugging?: boolean): boolean { + isDebugging ??= !!inspector.url(); + return setDebuggerAttached(isDebugging); +} diff --git a/packages/cspell/src/__snapshots__/app.test.ts.snap b/packages/cspell/src/__snapshots__/app.test.ts.snap index 3155e240c187..121aa54d1397 100644 --- a/packages/cspell/src/__snapshots__/app.test.ts.snap +++ b/packages/cspell/src/__snapshots__/app.test.ts.snap @@ -2117,7 +2117,7 @@ exports[`Validate cli > app 'typos --no-show-suggestions' Expect Error: [Functio exports[`Validate cli > app 'typos --show-suggestions' Expect Error: [Function CheckFailed] 1`] = `[]`; exports[`Validate cli > app 'typos --show-suggestions' Expect Error: [Function CheckFailed] 2`] = ` -"log code.ts:1:26 - Unknown word (Orangges) Suggestions: [Oranges, orange, Orange, Orangs, Orange's] +"log code.ts:1:26 - Unknown word (Orangges) Suggestions: [Oranges, orange, Orange, Orangs, Orangey] log log test.md:5:3 - Forbidden word (blacklist) Suggestions: [denylist*, backlist, backlit, blackest, blackish] log diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b5b99b3b7708..f72c1717f5b0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -915,6 +915,9 @@ importers: '@cspell/cspell-pipe': specifier: workspace:* version: link:../cspell-pipe + '@cspell/dict-cpp': + specifier: ^7.0.2 + version: 7.0.2 '@cspell/dict-en_us': specifier: ^4.4.27 version: 4.4.27