streetsidesoftware
diff --git a/‎packages/cspell-trie-lib/api/api.d.ts‎
Lines changed: 1 addition & 1 deletion b/‎packages/cspell-trie-lib/api/api.d.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/cspell-trie-lib/package.json‎
Lines changed: 1 addition & 1 deletion b/‎packages/cspell-trie-lib/package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/cspell-trie-lib/perf/Utf8.perf.ts‎
Lines changed: 94 additions & 1 deletion b/‎packages/cspell-trie-lib/perf/Utf8.perf.ts‎
Lines changed: 94 additions & 1 deletion
diff --git a/‎packages/cspell-trie-lib/perf/charIndex.perf.ts‎
Lines changed: 2 additions & 16 deletions b/‎packages/cspell-trie-lib/perf/charIndex.perf.ts‎
Lines changed: 2 additions & 16 deletions
diff --git a/‎packages/cspell-trie-lib/perf/has.perf.ts‎
Lines changed: 2 additions & 1 deletion b/‎packages/cspell-trie-lib/perf/has.perf.ts‎
Lines changed: 2 additions & 1 deletion
@@ -36,7 +36,7 @@
     "test:watch": "vitest",
     "test:perf": "pnpm test:perf:ts --all",
     "test:perf:ts": "insight --file \"**/*.perf.{mts,ts}\" -t 500",
-    "test:perf:prof": "NODE_ENV=production node --cpu-prof  ../../node_modules/perf-insight/bin.mjs -t 1000",
+    "test:perf:prof": "NODE_ENV=production node --cpu-prof --cpu-prof-interval=100  ../../node_modules/perf-insight/bin.mjs --file \"**/*.perf.{mts,ts}\" -t 5000",
     "perf": "pnpm test:perf",
     "test": "vitest run",
     "test:update-snapshot": "vitest run -u",
 
@@ -8,12 +8,14 @@ import {
     decodeUtf8N_LE,
     encodeCodePointsToUtf8Into,
     encodeTextToUtf8,
+    encodeTextToUtf8_32,
+    encodeTextToUtf8_32Into,
     encodeTextToUtf8Into,
     encodeUtf8N_BE,
     encodeUtf8N_LE,
     textToCodePoints,
 } from '../src/lib/TrieBlob/Utf8.ts';
-import { Utf8Encoder } from '../src/lib/TrieBlob/Utf8Encoder.ts';
+import { Utf8Encoder, Utf8Encoder2 } from '../src/lib/TrieBlob/Utf8Encoder.ts';
 
 const iterations = 1000;
 const text = sampleText();
@@ -23,6 +25,7 @@ suite('Utf8 encode', async (test) => {
     const encoder = new TextEncoder();
     const scratchBuffer = new Uint8Array(1024);
     const utf8Encoder = new Utf8Encoder();
+    const utf8Encoder2 = new Utf8Encoder2(1024);
 
     test(`TextEncoder.encodeInto words (${words.length})`, () => {
         const buffer = scratchBuffer;
@@ -124,6 +127,57 @@ suite('Utf8 encode', async (test) => {
         }
     });
 
+    test(`utf8Encoder2(word) to array words (${words.length})`, () => {
+        const _words = words;
+        for (let i = iterations; i > 0; --i) {
+            for (const word of _words) {
+                utf8Encoder2.encode(word);
+            }
+        }
+    });
+
+    test(`toUtf8Array(word) to array words (${words.length})`, () => {
+        const _words = words;
+        for (let i = iterations; i > 0; --i) {
+            for (const word of _words) {
+                toUtf8Array(word);
+            }
+        }
+    });
+
+    test(`toCodePoints(word) to array words (${words.length})`, () => {
+        const _words = words;
+        for (let i = iterations; i > 0; --i) {
+            for (const word of _words) {
+                toCodePoints(word);
+            }
+        }
+    });
+
+    test(`encodeTextToUtf8PointsInto(word) to array words (${words.length})`, () => {
+        const _words = words;
+        const buffer: number[] = new Array(100);
+        for (let i = iterations; i > 0; --i) {
+            for (const word of _words) {
+                encodeTextToUtf8_32Into(word, buffer);
+            }
+        }
+    });
+
+    test(`encodeTextToUtf8_32(word) to array words (${words.length})`, () => {
+        const _words = words;
+        const buffer: number[] = new Array(100);
+        for (let i = iterations; i > 0; --i) {
+            for (const word of _words) {
+                const len = word.length;
+                let j = 0;
+                for (let p = { text: word, offset: 0 }; p.offset < len; ) {
+                    buffer[j++] = encodeTextToUtf8_32(p);
+                }
+            }
+        }
+    });
+
     test(`encoder.encode(word) to array words (${words.length})`, () => {
         const _words = words;
         for (let i = iterations; i > 0; --i) {
@@ -361,3 +415,42 @@ function sampleText() {
     `;
     // cspell:enable
 }
+
+const textEncoder = new TextEncoder();
+const charMap: Record<string, number> = Object.create(null);
+
+function encodeChar(char: string): number {
+    const bytes = textEncoder.encode(char);
+    let code = 0;
+    for (let i = bytes.length - 1; i >= 0; i--) {
+        code = (code << 8) | bytes[i];
+    }
+    return code;
+}
+
+function toUtf8Array(text: string): number[] {
+    const src: string[] = [...text];
+    const dst: number[] = src as unknown as number[];
+
+    for (let i = 0; i < src.length; i++) {
+        const char = src[i];
+        let code = charMap[char];
+        if (code === undefined) {
+            code = encodeChar(char);
+            charMap[char] = code;
+        }
+        dst[i] = code;
+    }
+    return dst;
+}
+
+function toCodePoints(text: string): number[] {
+    const src: string[] = [...text];
+    const dst: number[] = src as unknown as number[];
+
+    for (let i = 0; i < src.length; i++) {
+        const char = src[i];
+        dst[i] = char.codePointAt(0) || 0;
+    }
+    return dst;
+}
@@ -1,5 +1,6 @@
 import { suite } from 'perf-insight';
 
+import { CharIndex } from '../src/lib/TrieBlob/CharIndex.ts';
 import { encodeTextToUtf8 } from '../src/lib/TrieBlob/Utf8.ts';
 import { readFastTrieBlobFromConfig, readTrieFromConfig } from '../src/test/dictionaries.test.helper.ts';
 
@@ -13,8 +14,7 @@ suite('encode to sequence', async (test) => {
     const words = await getWords();
     const msgSuffix = ' - ' + words.length + ' words';
     const fastTrieBlob = await getFastTrieBlob();
-    const trieBlob = fastTrieBlob.toTrieBlob();
-    const charIndex = trieBlob.charIndex;
+    const charIndex = CharIndex.fromIterable(words);
     const encoder = new TextEncoder();
 
     test('fastTrieBlob.wordToNodeCharIndexSequence' + msgSuffix, () => {
@@ -23,20 +23,6 @@ suite('encode to sequence', async (test) => {
         }
     });
 
-    test('trieBlob.wordToNodeCharIndexSequence' + msgSuffix, () => {
-        for (const word of words) {
-            trieBlob.wordToUtf8Seq(word);
-        }
-    });
-
-    test('trieBlob.wordToNodeCharIndexSequence x4' + msgSuffix, () => {
-        for (const word of words) {
-            for (let i = 0; i < 4; ++i) {
-                trieBlob.wordToUtf8Seq(word);
-            }
-        }
-    });
-
     test('charIndex.wordToCharIndexSequence' + msgSuffix, () => {
         for (const word of words) {
             charIndex.wordToUtf8Seq(word);
 
@@ -19,6 +19,7 @@ suite('trie has', async (test) => {
     const iTrieFast = new ITrieImpl(fastTrieBlob);
     const iTrieBlob = new ITrieImpl(trieBlob);
     const setOfWords = new Set(words);
+    console.log(`Number of words: ${words.length}`);
 
     test('set has words', () => {
         trieHasWords(setOfWords, words);
@@ -54,7 +55,7 @@ function _getFastTrieBlob() {
 }
 
 function trieHasWords(trie: { has: (word: string) => boolean }, words: string[]): boolean {
-    const has = (word: string) => trie.has(word);
+    const has = trie.has.bind(trie);
     const len = words.length;
     let success = true;
     for (let i = 0; i < len; ++i) {