diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 419dc73f..32605a11 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -57,7 +57,7 @@ jobs:
       matrix:
         config:
           - name: "Windows for x64"
-            os: windows-2019
+            os: windows-2022
             artifact: "win-x64"
           - name: "Windows for Arm"
             os: windows-2022
diff --git a/.vitepress/utils/ensureLocalImage.ts b/.vitepress/utils/ensureLocalImage.ts
index 47894cd1..cbba76a9 100644
--- a/.vitepress/utils/ensureLocalImage.ts
+++ b/.vitepress/utils/ensureLocalImage.ts
@@ -55,7 +55,7 @@ export async function ensureLocalImage(url: string, name: string, {
     if (resolvedImages.has(cacheKey))
         return resolvedImages.get(cacheKey)!;
 
-    return await withLock(cacheKey[0], cacheKey[1], async () => {
+    return await withLock([resolvedImages, ...cacheKey], async () => {
         if (resolvedImages.has(cacheKey))
             return resolvedImages.get(cacheKey)!;
 
@@ -185,7 +185,9 @@ function getFileExtension(format: keyof FormatEnum | undefined) {
 async function fetchWithRetry(url: string, retires: number = 5, waitTime: number = 1000 * 2) {
     for (let i = retires; i >= 0; i--) {
         try {
-            return await fetch(url);
+            return await fetch(url, {
+                redirect: "follow"
+            });
         } catch (err) {
             if (i === 0) {
                 console.error(`Failed to fetch image: ${url}`, err);
diff --git a/docs/guide/embedding.md b/docs/guide/embedding.md
index 6ce591a5..cf697e09 100644
--- a/docs/guide/embedding.md
+++ b/docs/guide/embedding.md
@@ -172,7 +172,7 @@ const documents = [
     "Cleaning the house is a good way to keep it tidy"
 ];
 
-const query = "Tell me a goegraphical fact";
+const query = "Tell me a nature geographical fact";
 const rankedDocuments = await context.rankAndSort(query, documents);
 
 const topDocument = rankedDocuments[0]!;
@@ -185,7 +185,7 @@ console.log("Ranked documents:", rankedDocuments);
 ```
 > This example will produce this output:
 > ```
-> query: Tell me a goegraphical fact
+> query: Tell me a nature geographical fact
 > Top document: Mount Everest is the tallest mountain in the world
 > Second document: The capital of France is Paris
 > ```
diff --git a/llama/addon/addon.cpp b/llama/addon/addon.cpp
index eef81c25..a01a987e 100644
--- a/llama/addon/addon.cpp
+++ b/llama/addon/addon.cpp
@@ -196,6 +196,36 @@ Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
     return info.Env().Undefined();
 }
 
+Napi::Value addonSetNuma(const Napi::CallbackInfo& info) {
+    const bool numaDisabled = info.Length() == 0
+        ? true
+        : info[0].IsBoolean()
+            ? !info[0].As<Napi::Boolean>().Value()
+            : false;
+
+    if (numaDisabled)
+        return info.Env().Undefined();
+
+    const auto numaType = info[0].IsString()
+        ? info[0].As<Napi::String>().Utf8Value()
+        : "";
+
+    if (numaType == "distribute") {
+        llama_numa_init(GGML_NUMA_STRATEGY_DISTRIBUTE);
+    } else if (numaType == "isolate") {
+        llama_numa_init(GGML_NUMA_STRATEGY_ISOLATE);
+    } else if (numaType == "numactl") {
+        llama_numa_init(GGML_NUMA_STRATEGY_NUMACTL);
+    } else if (numaType == "mirror") {
+        llama_numa_init(GGML_NUMA_STRATEGY_MIRROR);
+    } else {
+        Napi::Error::New(info.Env(), std::string("Invalid NUMA strategy \"") + numaType + "\"").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return info.Env().Undefined();
+}
+
 Napi::Value addonInit(const Napi::CallbackInfo& info) {
     if (backendInitialized) {
         Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
@@ -255,6 +285,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
         Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
         Napi::PropertyDescriptor::Function("getMemoryInfo", getMemoryInfo),
         Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
+        Napi::PropertyDescriptor::Function("setNuma", addonSetNuma),
         Napi::PropertyDescriptor::Function("init", addonInit),
         Napi::PropertyDescriptor::Function("dispose", addonDispose),
     });
diff --git a/package-lock.json b/package-lock.json
index 0e327ffe..719e8ef6 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,7 +10,7 @@
       "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {
-        "@huggingface/jinja": "^0.5.0",
+        "@huggingface/jinja": "^0.5.1",
         "async-retry": "^1.3.3",
         "bytes": "^3.1.2",
         "chalk": "^5.4.1",
@@ -24,7 +24,7 @@
         "ignore": "^7.0.4",
         "ipull": "^3.9.2",
         "is-unicode-supported": "^2.1.0",
-        "lifecycle-utils": "^2.0.1",
+        "lifecycle-utils": "^3.0.1",
         "log-symbols": "^7.0.0",
         "nanoid": "^5.1.5",
         "node-addon-api": "^8.3.1",
@@ -70,7 +70,7 @@
         "@types/yargs": "^17.0.33",
         "@vitest/coverage-v8": "^3.1.3",
         "@vitest/ui": "^3.1.3",
-        "electron": "^36.2.0",
+        "electron": "^37.2.4",
         "eslint": "^9.26.0",
         "eslint-import-resolver-typescript": "^4.3.4",
         "eslint-plugin-import": "^2.31.0",
@@ -91,7 +91,7 @@
         "typescript-eslint": "^8.32.0",
         "vite-node": "^3.1.3",
         "vitepress": "^1.6.3",
-        "vitepress-plugin-llms": "https://pkg.pr.new/vitepress-plugin-llms@51",
+        "vitepress-plugin-llms": "^1.7.2",
         "vitest": "^3.1.3",
         "zx": "^8.5.4"
       },
@@ -1613,9 +1613,9 @@
       }
     },
     "node_modules/@huggingface/jinja": {
-      "version": "0.5.0",
-      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.0.tgz",
-      "integrity": "sha512-Ptc03/jGRiYRoi0bUYKZ14MkDslsBRT24oxmsvUlfYrvQMldrxCevhPnT+hfX8awKTT8/f/0ZBBWldoeAcMHdQ==",
+      "version": "0.5.1",
+      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.1.tgz",
+      "integrity": "sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==",
       "license": "MIT",
       "engines": {
         "node": ">=18"
@@ -2111,6 +2111,29 @@
         "url": "https://opencollective.com/libvips"
       }
     },
+    "node_modules/@isaacs/balanced-match": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz",
+      "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
+    "node_modules/@isaacs/brace-expansion": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz",
+      "integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@isaacs/balanced-match": "^4.0.1"
+      },
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
     "node_modules/@isaacs/cliui": {
       "version": "8.0.2",
       "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
@@ -7501,9 +7524,9 @@
       "license": "MIT"
     },
     "node_modules/electron": {
-      "version": "36.2.0",
-      "resolved": "https://registry.npmjs.org/electron/-/electron-36.2.0.tgz",
-      "integrity": "sha512-5yldoRjBKxPQfI0QMX+qq750o3Nl8N1SZnJqOPMq0gZ6rIJ+7y4ZLp808GrFwjfTm05TYgq3GSD8FGuKQZqwEw==",
+      "version": "37.2.4",
+      "resolved": "https://registry.npmjs.org/electron/-/electron-37.2.4.tgz",
+      "integrity": "sha512-F1WDDvY60TpFwGyW+evNB5q0Em8PamcDTVIKB2NaiaKEbNC2Fabn8Wyxy5g+Anirr1K40eKGjfSJhWEUbI1TOw==",
       "dev": true,
       "hasInstallScript": true,
       "license": "MIT",
@@ -11548,9 +11571,9 @@
       }
     },
     "node_modules/lifecycle-utils": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/lifecycle-utils/-/lifecycle-utils-2.0.1.tgz",
-      "integrity": "sha512-jVso5WXIHfDL7Lf9sCRbLbPwgpoha5qUPgi+RMNVIMuOcb0nJ9Qr0r1OXbqLaxzBUQBhN8jYy92RLSk2OGJ6Cg==",
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/lifecycle-utils/-/lifecycle-utils-3.0.1.tgz",
+      "integrity": "sha512-Qt/Jl5dsNIsyCAZsHB6x3mbwHFn0HJbdmvF49sVX/bHgX2cW7+G+U+I67Zw+TPM1Sr21Gb2nfJMd2g6iUcI1EQ==",
       "license": "MIT"
     },
     "node_modules/lines-and-columns": {
@@ -19465,9 +19488,9 @@
       }
     },
     "node_modules/tokenx": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/tokenx/-/tokenx-1.0.1.tgz",
-      "integrity": "sha512-MhOngUHRuVE0CHP4cNEZ/XpdXETFL65nJpEvoTW+VYPuXsT/MTeNj+UNnekNsnxecmj2DEvUYPebqz+CsPTUSg==",
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/tokenx/-/tokenx-1.1.0.tgz",
+      "integrity": "sha512-KCjtiC2niPwTSuz4ktM82Ki5bjqBwYpssiHDsGr5BpejN/B3ksacRvrsdoxljdMIh2nCX78alnDkeemBmYUmTA==",
       "dev": true,
       "license": "MIT"
     },
@@ -20349,21 +20372,23 @@
       }
     },
     "node_modules/vitepress-plugin-llms": {
-      "version": "1.3.4",
-      "resolved": "https://pkg.pr.new/vitepress-plugin-llms@51",
-      "integrity": "sha512-FTyNYyx1jVbKae/raJLgDTgMaHSmY51B1nbokeC4KAhXMe413eGSexNIdvnCHXf9U1t92VlLajJ5S9E7adDoOQ==",
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/vitepress-plugin-llms/-/vitepress-plugin-llms-1.7.2.tgz",
+      "integrity": "sha512-4UxB3PXfRAfzbcKRXizRQajstjmYn1hoFOSCGIQBYyu3qYs9/TEAUe6oLGbiwaDD+wPQ/T1ow59pt2LAMR4/1A==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
         "byte-size": "^9.0.1",
         "gray-matter": "^4.0.3",
+        "markdown-it": "^14.1.0",
         "markdown-title": "^1.0.2",
         "millify": "^6.1.0",
-        "minimatch": "^10.0.1",
+        "minimatch": "^10.0.3",
+        "path-to-regexp": "^8.2.0",
         "picocolors": "^1.1.1",
         "remark": "^15.0.1",
         "remark-frontmatter": "^5.0.0",
-        "tokenx": "^1.0.0",
+        "tokenx": "^1.1.0",
         "unist-util-remove": "^4.0.0",
         "unist-util-visit": "^5.0.0"
       },
@@ -20372,13 +20397,13 @@
       }
     },
     "node_modules/vitepress-plugin-llms/node_modules/minimatch": {
-      "version": "10.0.1",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.0.1.tgz",
-      "integrity": "sha512-ethXTt3SGGR+95gudmqJ1eNhRO7eGEGIgYA9vnPatK4/etz2MEVDno5GMCibdMTuBMyElzIlgxMna3K94XDIDQ==",
+      "version": "10.0.3",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.0.3.tgz",
+      "integrity": "sha512-IPZ167aShDZZUMdRk66cyQAW3qr0WzbHkPdMYa8bzZhlHhO3jALbKdxcaak7W9FfT2rZNpQuUu4Od7ILEpXSaw==",
       "dev": true,
       "license": "ISC",
       "dependencies": {
-        "brace-expansion": "^2.0.1"
+        "@isaacs/brace-expansion": "^5.0.0"
       },
       "engines": {
         "node": "20 || >=22"
diff --git a/package.json b/package.json
index b63a6e46..bbaeabc5 100644
--- a/package.json
+++ b/package.json
@@ -157,7 +157,7 @@
     "@types/yargs": "^17.0.33",
     "@vitest/coverage-v8": "^3.1.3",
     "@vitest/ui": "^3.1.3",
-    "electron": "^36.2.0",
+    "electron": "^37.2.4",
     "eslint": "^9.26.0",
     "eslint-import-resolver-typescript": "^4.3.4",
     "eslint-plugin-import": "^2.31.0",
@@ -178,12 +178,12 @@
     "typescript-eslint": "^8.32.0",
     "vite-node": "^3.1.3",
     "vitepress": "^1.6.3",
-    "vitepress-plugin-llms": "https://pkg.pr.new/vitepress-plugin-llms@51",
+    "vitepress-plugin-llms": "^1.7.2",
     "vitest": "^3.1.3",
     "zx": "^8.5.4"
   },
   "dependencies": {
-    "@huggingface/jinja": "^0.5.0",
+    "@huggingface/jinja": "^0.5.1",
     "async-retry": "^1.3.3",
     "bytes": "^3.1.2",
     "chalk": "^5.4.1",
@@ -197,7 +197,7 @@
     "ignore": "^7.0.4",
     "ipull": "^3.9.2",
     "is-unicode-supported": "^2.1.0",
-    "lifecycle-utils": "^2.0.1",
+    "lifecycle-utils": "^3.0.1",
     "log-symbols": "^7.0.0",
     "nanoid": "^5.1.5",
     "node-addon-api": "^8.3.1",
diff --git a/src/bindings/AddonTypes.ts b/src/bindings/AddonTypes.ts
index a2f06ae9..a1cbefc3 100644
--- a/src/bindings/AddonTypes.ts
+++ b/src/bindings/AddonTypes.ts
@@ -1,4 +1,5 @@
 import {Token} from "../types.js";
+import {LlamaNuma} from "./types.js";
 
 
 export type BindingModule = {
@@ -85,6 +86,7 @@ export type BindingModule = {
         total: number
     },
     init(): Promise<void>,
+    setNuma(numa?: LlamaNuma): void,
     loadBackends(forceLoadLibrariesSearchPath?: string): void,
     dispose(): Promise<void>
 };
@@ -159,7 +161,7 @@ export type AddonContext = {
 };
 
 export type BatchLogitIndex = number & {
-    __batchLogitIndex: never
+    readonly __batchLogitIndex: never
 };
 
 export type AddonGrammar = {
@@ -167,7 +169,7 @@ export type AddonGrammar = {
 };
 
 export type AddonGrammarEvaluationState = "AddonGrammarEvaluationState" & {
-    __brand: never
+    readonly __brand: never
 };
 
 export type AddonSampler = {
diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts
index 005e4a7a..243ad4ff 100644
--- a/src/bindings/Llama.ts
+++ b/src/bindings/Llama.ts
@@ -11,7 +11,7 @@ import {LlamaGrammar, LlamaGrammarOptions} from "../evaluator/LlamaGrammar.js";
 import {ThreadsSplitter} from "../utils/ThreadsSplitter.js";
 import {getLlamaClasses, LlamaClasses} from "../utils/getLlamaClasses.js";
 import {BindingModule} from "./AddonTypes.js";
-import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel, LlamaLogLevelGreaterThanOrEqual} from "./types.js";
+import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel, LlamaLogLevelGreaterThanOrEqual, LlamaNuma} from "./types.js";
 import {MemoryOrchestrator, MemoryReservation} from "./utils/MemoryOrchestrator.js";
 
 export const LlamaLogLevelToAddonLogLevel: ReadonlyMap<LlamaLogLevel, number> = new Map([
@@ -67,8 +67,8 @@ export class Llama {
     public readonly onDispose = new EventRelay<void>();
 
     private constructor({
-        bindings, bindingPath, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, buildGpu, maxThreads, vramOrchestrator,
-        vramPadding, ramOrchestrator, ramPadding, swapOrchestrator
+        bindings, bindingPath, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, numa, buildGpu, maxThreads,
+        vramOrchestrator, vramPadding, ramOrchestrator, ramPadding, swapOrchestrator
     }: {
         bindings: BindingModule,
         bindingPath: string,
@@ -81,6 +81,7 @@ export class Llama {
             release: string
         },
         debug: boolean,
+        numa?: LlamaNuma,
         buildGpu: BuildGpu,
         maxThreads?: number,
         vramOrchestrator: MemoryOrchestrator,
@@ -110,6 +111,9 @@ export class Llama {
 
         bindings.ensureGpuDeviceIsSupported();
 
+        if (numa != null && numa !== false)
+            bindings.setNuma(numa);
+
         this._gpu = bindings.getGpuType() ?? false;
         this._supportsGpuOffloading = bindings.getSupportsGpuOffloading();
         this._supportsMmap = bindings.getSupportsMmap();
@@ -328,7 +332,7 @@ export class Llama {
     public async loadModel(options: LlamaModelOptions) {
         this._ensureNotDisposed();
 
-        return await withLock(this._memoryLock, LlamaLocks.loadToMemory, options.loadSignal, async () => {
+        return await withLock([this._memoryLock, LlamaLocks.loadToMemory], options.loadSignal, async () => {
             this._ensureNotDisposed();
 
             const preventDisposalHandle = this._backendDisposeGuard.createPreventDisposalHandle();
@@ -468,7 +472,7 @@ export class Llama {
     /** @internal */
     public static async _create({
         bindings, bindingPath, buildType, buildMetadata, logLevel, logger, vramPadding, ramPadding, maxThreads, skipLlamaInit = false,
-        debug
+        debug, numa
     }: {
         bindings: BindingModule,
         bindingPath: string,
@@ -480,7 +484,8 @@ export class Llama {
         vramPadding: number | ((totalVram: number) => number),
         ramPadding: number | ((totalRam: number) => number),
         skipLlamaInit?: boolean,
-        debug: boolean
+        debug: boolean,
+        numa?: LlamaNuma
     }) {
         const vramOrchestrator = new MemoryOrchestrator(() => {
             const {total, used, unifiedSize} = bindings.getGpuVramInfo();
@@ -537,6 +542,7 @@ export class Llama {
             logLevel,
             logger,
             debug,
+            numa,
             buildGpu: buildMetadata.buildOptions.gpu,
             vramOrchestrator,
             maxThreads,
@@ -643,6 +649,12 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string): LlamaLog
         return LlamaLogLevel.log;
     else if (level === LlamaLogLevel.warn && message.startsWith("make_cpu_buft_list: disabling extra buffer types"))
         return LlamaLogLevel.info;
+    else if (level === LlamaLogLevel.warn && message.startsWith("llama_context: non-unified KV cache requires ggml_set_rows() - forcing unified KV cache"))
+        return LlamaLogLevel.info;
+    else if (level === LlamaLogLevel.warn && message.startsWith("llama_kv_cache_unified: LLAMA_SET_ROWS=0, using old ggml_cpy() method for backwards compatibility"))
+        return LlamaLogLevel.info;
+    else if (level === LlamaLogLevel.warn && message.startsWith("init: embeddings required but some input tokens were not marked as outputs -> overriding"))
+        return LlamaLogLevel.info;
 
     return level;
 }
diff --git a/src/bindings/getLlama.ts b/src/bindings/getLlama.ts
index 8ba71a22..3d6b85a9 100644
--- a/src/bindings/getLlama.ts
+++ b/src/bindings/getLlama.ts
@@ -16,7 +16,7 @@ import {
 } from "./utils/compileLLamaCpp.js";
 import {getLastBuildInfo} from "./utils/lastBuildInfo.js";
 import {getClonedLlamaCppRepoReleaseInfo, isLlamaCppRepoCloned} from "./utils/cloneLlamaCppRepo.js";
-import {BuildGpu, BuildMetadataFile, BuildOptions, LlamaGpuType, LlamaLogLevel} from "./types.js";
+import {BuildGpu, BuildMetadataFile, BuildOptions, LlamaGpuType, LlamaLogLevel, LlamaNuma} from "./types.js";
 import {BinaryPlatform, getPlatform} from "./utils/getPlatform.js";
 import {getBuildFolderNameForBuildOptions} from "./utils/getBuildFolderNameForBuildOptions.js";
 import {resolveCustomCmakeOptions} from "./utils/resolveCustomCmakeOptions.js";
@@ -171,7 +171,27 @@ export type LlamaOptions = {
      *
      * Defaults to `false`.
      */
-    dryRun?: boolean
+    dryRun?: boolean,
+
+    /**
+     * NUMA (Non-Uniform Memory Access) allocation policy.
+     *
+     * On multi-socket or multi-cluster machines, each CPU "socket" (or node) has its own local memory.
+     * Accessing memory on your own socket is fast, but another socket's memory is slower.
+     * Setting a NUMA (Non-Uniform Memory Access) allocation policy can
+     * dramatically improve performance by keeping data local and "close" to the socket.
+     *
+     * These are the available NUMA options:
+     * - **`false`**: Don't set any NUMA policy - let the OS decide.
+     * - **`"distribute"`**: Distribute the memory across all available NUMA nodes.
+     * - **`"isolate"`**: Pin both threads and their memory to a single NUMA node to avoid cross-node traffic.
+     * - **`"numactl"`**: Delegate NUMA management to the external `numactl` command (or `libnuma` library) to set the NUMA policy.
+     * - **`"mirror"`**: Allocate memory on all NUMA nodes, and copy the data to all of them.
+     *     This ensures minimal traffic between nodes, but uses more memory.
+     *
+     * Defaults to `false` (no NUMA policy).
+     */
+    numa?: LlamaNuma
 };
 
 export type LastBuildOptions = {
@@ -261,7 +281,27 @@ export type LastBuildOptions = {
      *
      * Defaults to `false`.
      */
-    dryRun?: boolean
+    dryRun?: boolean,
+
+    /**
+     * NUMA (Non-Uniform Memory Access) allocation policy.
+     *
+     * On multi-socket or multi-cluster machines, each CPU "socket" (or node) has its own local memory.
+     * Accessing memory on your own socket is fast, but another socket's memory is slower.
+     * Setting a NUMA (Non-Uniform Memory Access) allocation policy can
+     * dramatically improve performance by keeping data local and "close" to the socket.
+     *
+     * These are the available NUMA options:
+     * - **`false`**: Don't set any NUMA policy - let the OS decide.
+     * - **`"distribute"`**: Distribute the memory across all available NUMA nodes.
+     * - **`"isolate"`**: Pin both threads and their memory to a single NUMA node to avoid cross-node traffic.
+     * - **`"numactl"`**: Delegate NUMA management to the external `numactl` command (or `libnuma` library) to set the NUMA policy.
+     * - **`"mirror"`**: Allocate memory on all NUMA nodes, and copy the data to all of them.
+     *     This ensures minimal traffic between nodes, but uses more memory.
+     *
+     * Defaults to `false` (no NUMA policy).
+     */
+    numa?: LlamaNuma
 };
 
 export const getLlamaFunctionName = "getLlama";
@@ -319,6 +359,7 @@ export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOp
             vramPadding: lastBuildOptions?.vramPadding ?? defaultLlamaVramPadding,
             ramPadding: lastBuildOptions?.ramPadding ?? defaultLlamaRamPadding,
             debug: lastBuildOptions?.debug ?? defaultLlamaCppDebugMode,
+            numa: lastBuildOptions?.numa,
             dryRun
         };
 
@@ -346,6 +387,7 @@ export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOp
                     vramPadding: lastBuildOptions?.vramPadding ?? defaultLlamaVramPadding,
                     ramPadding: lastBuildOptions?.ramPadding ?? defaultLlamaRamPadding,
                     debug: lastBuildOptions?.debug ?? defaultLlamaCppDebugMode,
+                    numa: lastBuildOptions?.numa,
                     skipLlamaInit: dryRun
                 });
 
@@ -380,6 +422,7 @@ export async function getLlamaForOptions({
     vramPadding = defaultLlamaVramPadding,
     ramPadding = defaultLlamaRamPadding,
     debug = defaultLlamaCppDebugMode,
+    numa = false,
     dryRun = false
 }: LlamaOptions, {
     updateLastBuildInfoOnCompile = false,
@@ -453,6 +496,7 @@ export async function getLlamaForOptions({
                     vramPadding,
                     ramPadding,
                     debug,
+                    numa,
                     dryRun
                 });
             } catch (err) {
@@ -470,6 +514,7 @@ export async function getLlamaForOptions({
                     vramPadding,
                     ramPadding,
                     debug,
+                    numa,
                     dryRun
                 });
             }
@@ -516,6 +561,7 @@ export async function getLlamaForOptions({
                             : null
                     ),
                 debug,
+                numa,
                 pipeBinaryTestErrorLogs
             });
 
@@ -577,7 +623,8 @@ export async function getLlamaForOptions({
                 vramPadding,
                 ramPadding,
                 skipLlamaInit,
-                debug
+                debug,
+                numa
             });
         } catch (err) {
             console.error(
@@ -622,6 +669,7 @@ async function loadExistingLlamaBinary({
     ramPadding,
     fallbackMessage,
     debug,
+    numa,
     pipeBinaryTestErrorLogs
 }: {
     buildOptions: BuildOptions,
@@ -638,6 +686,7 @@ async function loadExistingLlamaBinary({
     ramPadding: Required<LlamaOptions>["ramPadding"],
     fallbackMessage: string | null,
     debug: boolean,
+    numa?: LlamaNuma,
     pipeBinaryTestErrorLogs: boolean
 }) {
     const buildFolderName = await getBuildFolderNameForBuildOptions(buildOptions);
@@ -674,7 +723,8 @@ async function loadExistingLlamaBinary({
                     vramPadding,
                     ramPadding,
                     skipLlamaInit,
-                    debug
+                    debug,
+                    numa
                 });
             } else if (progressLogs) {
                 console.warn(
@@ -733,7 +783,8 @@ async function loadExistingLlamaBinary({
                         vramPadding,
                         ramPadding,
                         skipLlamaInit,
-                        debug
+                        debug,
+                        numa
                     });
                 } else if (progressLogs) {
                     const binaryDescription = describeBinary({
@@ -788,7 +839,8 @@ async function buildAndLoadLlamaBinary({
     vramPadding,
     ramPadding,
     skipLlamaInit,
-    debug
+    debug,
+    numa
 }: {
     buildOptions: BuildOptions,
     skipDownload: boolean,
@@ -799,7 +851,8 @@ async function buildAndLoadLlamaBinary({
     vramPadding: Required<LlamaOptions>["vramPadding"],
     ramPadding: Required<LlamaOptions>["ramPadding"],
     skipLlamaInit: boolean,
-    debug: boolean
+    debug: boolean,
+    numa?: LlamaNuma
 }) {
     const buildFolderName = await getBuildFolderNameForBuildOptions(buildOptions);
 
@@ -833,7 +886,8 @@ async function buildAndLoadLlamaBinary({
         vramPadding,
         ramPadding,
         skipLlamaInit,
-        debug
+        debug,
+        numa
     });
 }
 
diff --git a/src/bindings/types.ts b/src/bindings/types.ts
index 4adfd86f..7748772c 100644
--- a/src/bindings/types.ts
+++ b/src/bindings/types.ts
@@ -22,6 +22,7 @@ export type BuildOptions = {
         release: string
     }
 };
+export type LlamaNuma = false | "distribute" | "isolate" | "numactl" | "mirror";
 
 export type BuildOptionsJSON = Omit<BuildOptions, "customCmakeOptions"> & {
     customCmakeOptions: Record<string, string>
diff --git a/src/bindings/utils/getLlamaWithoutBackend.ts b/src/bindings/utils/getLlamaWithoutBackend.ts
index 6a64d59f..992cdf6a 100644
--- a/src/bindings/utils/getLlamaWithoutBackend.ts
+++ b/src/bindings/utils/getLlamaWithoutBackend.ts
@@ -12,7 +12,7 @@ export async function getLlamaWithoutBackend() {
     if (sharedLlamaWithoutBackend != null)
         return sharedLlamaWithoutBackend;
 
-    return await withLock(getLlamaWithoutBackend, "loadAddon", async () => {
+    return await withLock([getLlamaWithoutBackend, "loadAddon"], async () => {
         if (sharedLlamaWithoutBackend != null)
             return sharedLlamaWithoutBackend;
 
diff --git a/src/cli/commands/inspect/commands/InspectGpuCommand.ts b/src/cli/commands/inspect/commands/InspectGpuCommand.ts
index 59502963..51caa14c 100644
--- a/src/cli/commands/inspect/commands/InspectGpuCommand.ts
+++ b/src/cli/commands/inspect/commands/InspectGpuCommand.ts
@@ -8,12 +8,14 @@ import {BuildGpu, LlamaLogLevel} from "../../../../bindings/types.js";
 import {getPrettyBuildGpuName} from "../../../../bindings/consts.js";
 import {getModuleVersion} from "../../../../utils/getModuleVersion.js";
 import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
-import {documentationPageUrls} from "../../../../config.js";
+import {builtinLlamaCppGitHubRepo, documentationPageUrls} from "../../../../config.js";
 import {Llama} from "../../../../bindings/Llama.js";
 import {getPlatformInfo} from "../../../../bindings/utils/getPlatformInfo.js";
 import {getLinuxDistroInfo} from "../../../../bindings/utils/getLinuxDistroInfo.js";
 import {isRunningUnderRosetta} from "../../../utils/isRunningUnderRosetta.js";
 import {toBytes} from "../../../utils/toBytes.js";
+import {getBinariesGithubRelease} from "../../../../bindings/utils/binariesGithubRelease.js";
+import {getClonedLlamaCppRepoReleaseInfo} from "../../../../bindings/utils/cloneLlamaCppRepo.js";
 
 type InspectGpuCommand = {
     // no options for now
@@ -74,8 +76,33 @@ export const InspectGpuCommand: CommandModule<object, InspectGpuCommand> = {
         try {
             const moduleVersion = await getModuleVersion();
 
-            if (moduleVersion != null)
+            if (moduleVersion != null) {
+                console.info();
                 console.info(`${chalk.yellow("node-llama-cpp:")} ${moduleVersion}`);
+            }
+        } catch (err) {
+            // do nothing
+        }
+
+        try {
+            const prebuiltBinariesRelease = await getBinariesGithubRelease();
+
+            console.info(`${chalk.yellow("Prebuilt binaries:")} ${prebuiltBinariesRelease}`);
+        } catch (err) {
+            // do nothing
+        }
+
+        try {
+            const clonedLlamaCppRelease = await getClonedLlamaCppRepoReleaseInfo();
+
+            if (clonedLlamaCppRelease != null)
+                console.info(
+                    `${chalk.yellow("Cloned source:")} ${clonedLlamaCppRelease.tag}` + (
+                        clonedLlamaCppRelease.llamaCppGithubRepo !== builtinLlamaCppGitHubRepo
+                            ? ` (${clonedLlamaCppRelease.llamaCppGithubRepo})`
+                            : ""
+                    )
+                );
         } catch (err) {
             // do nothing
         }
diff --git a/src/evaluator/LlamaChat/LlamaChat.ts b/src/evaluator/LlamaChat/LlamaChat.ts
index 628e220c..ddbfbcec 100644
--- a/src/evaluator/LlamaChat/LlamaChat.ts
+++ b/src/evaluator/LlamaChat/LlamaChat.ts
@@ -603,7 +603,7 @@ export class LlamaChat {
         if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
             throw new Error("Using both grammar and functions is not supported yet");
 
-        return await withLock(this._chatLock, "evaluate", signal, async (): Promise<LlamaChatResponse<Functions>> => {
+        return await withLock([this._chatLock, "evaluate"], signal, async (): Promise<LlamaChatResponse<Functions>> => {
             try {
                 generateResponseState.ensureLastHistoryItemIsModel();
                 generateResponseState.ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded();
@@ -801,7 +801,7 @@ export class LlamaChat {
             }
         );
 
-        return await withLock(this._chatLock, "evaluate", signal, async (): Promise<LlamaChatLoadAndCompleteUserResponse> => {
+        return await withLock([this._chatLock, "evaluate"], signal, async (): Promise<LlamaChatLoadAndCompleteUserResponse> => {
             try {
                 generateResponseState.ensureLastHistoryItemIsUser();
 
diff --git a/src/evaluator/LlamaChatSession/LlamaChatSession.ts b/src/evaluator/LlamaChatSession/LlamaChatSession.ts
index f0a0ba77..183b6729 100644
--- a/src/evaluator/LlamaChatSession/LlamaChatSession.ts
+++ b/src/evaluator/LlamaChatSession/LlamaChatSession.ts
@@ -525,7 +525,7 @@ export class LlamaChatSession {
             throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
 
         this._stopAllPreloadAndPromptCompletions();
-        return await withLock(this._chatLock, "evaluation", signal, async () => {
+        return await withLock([this._chatLock, "evaluation"], signal, async () => {
             this._ensureNotDisposed();
             this._stopAllPreloadAndPromptCompletions();
 
@@ -856,7 +856,7 @@ export class LlamaChatSession {
         this._preloadAndCompleteAbortControllers.add(abortController);
 
         try {
-            return await withLock(this._chatLock, "evaluation", abortController.signal, async () => {
+            return await withLock([this._chatLock, "evaluation"], abortController.signal, async () => {
                 this._ensureNotDisposed();
 
                 if (this._chat == null)
diff --git a/src/evaluator/LlamaCompletion.ts b/src/evaluator/LlamaCompletion.ts
index 37446682..3b500472 100644
--- a/src/evaluator/LlamaCompletion.ts
+++ b/src/evaluator/LlamaCompletion.ts
@@ -302,7 +302,7 @@ export class LlamaCompletion {
                 throw new DisposedError();
         };
 
-        return await withLock(this, "generateCompletion", signal, async () => {
+        return await withLock([this as LlamaCompletion, "generateCompletion"], signal, async () => {
             ensureNotAborted();
 
             if (this._sequence == null || this.disposed)
@@ -503,7 +503,7 @@ export class LlamaCompletion {
                 throw new DisposedError();
         };
 
-        return await withLock(this, "generateCompletion", signal, async () => {
+        return await withLock([this as LlamaCompletion, "generateCompletion"], signal, async () => {
             ensureNotAborted();
 
             if (this._sequence == null || this.disposed)
diff --git a/src/evaluator/LlamaContext/LlamaContext.ts b/src/evaluator/LlamaContext/LlamaContext.ts
index 8a5cff98..974f7865 100644
--- a/src/evaluator/LlamaContext/LlamaContext.ts
+++ b/src/evaluator/LlamaContext/LlamaContext.ts
@@ -315,7 +315,7 @@ export class LlamaContext {
 
         this._batchDispatchPending = true;
 
-        void withLock(this, "context", async () => {
+        void withLock([this as LlamaContext, "context"], async () => {
             this._currentDispatchBatchHandle = {};
             this._dispatchDecodeScheduled = false;
             this._batchDispatchPending = false;
@@ -589,7 +589,7 @@ export class LlamaContext {
                     let decodeLock: Lock | undefined;
                     // this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel
                     if (this._llama.gpu === "vulkan")
-                        decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode");
+                        decodeLock = await acquireLock([decodeSyncWorkaround.vulkanLock, "decode"]);
 
                     try {
                         await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
@@ -653,7 +653,7 @@ export class LlamaContext {
         if (this._disposed)
             return;
 
-        void withLock(this, "context", async () => {
+        void withLock([this as LlamaContext, "context"], async () => {
             if (this._disposed)
                 return;
 
@@ -1215,7 +1215,7 @@ export class LlamaContextSequence {
 
         let awaitPromise: Promise<void> | undefined;
 
-        await withLock(this._context, "context", async () => {
+        await withLock([this._context, "context"], async () => {
             this._ensureNotDisposed();
 
             if (ranges.length === 0)
@@ -1555,7 +1555,7 @@ export class LlamaContextSequence {
             return item;
         });
 
-        const evaluatorLock = await acquireLock(this._lock, "evaluate");
+        const evaluatorLock = await acquireLock([this._lock, "evaluate"]);
         try {
             return await this._decodeTokens(
                 resolvedTokens,
@@ -1588,7 +1588,7 @@ export class LlamaContextSequence {
                         tokenBias: sampleOptions.tokenBias
                     });
 
-                    return await withLock(sampler, "sample", async () => {
+                    return await withLock([sampler, "sample"], async () => {
                         if (sampler.disposed)
                             return undefined;
 
@@ -1623,7 +1623,7 @@ export class LlamaContextSequence {
             );
         } finally {
             evaluatorLock.dispose();
-            void withLock(sampler, "sample", sampler.asyncDispose);
+            void withLock([sampler, "sample"], sampler.asyncDispose);
         }
     }
 
@@ -1638,8 +1638,8 @@ export class LlamaContextSequence {
 
         const resolvedPath = path.resolve(process.cwd(), filePath);
 
-        const evaluatorLock = await acquireLock(this._lock, "evaluate");
-        const contextLock = await acquireLock(this._context, "context");
+        const evaluatorLock = await acquireLock([this._lock, "evaluate"]);
+        const contextLock = await acquireLock([this._context, "context"]);
 
         try {
             this._ensureNotDisposed();
@@ -1681,8 +1681,8 @@ export class LlamaContextSequence {
 
         const resolvedPath = path.resolve(process.cwd(), filePath);
 
-        const evaluatorLock = await acquireLock(this._lock, "evaluate");
-        const contextLock = await acquireLock(this._context, "context");
+        const evaluatorLock = await acquireLock([this._lock, "evaluate"]);
+        const contextLock = await acquireLock([this._context, "context"]);
 
         try {
             this._ensureNotDisposed();
@@ -1757,7 +1757,7 @@ export class LlamaContextSequence {
                 this._ensureNotDisposed();
                 const evaluatorLock = _skipLock
                     ? undefined
-                    : await acquireLock(this._lock, "evaluate");
+                    : await acquireLock([this._lock, "evaluate"]);
                 let nextToken: Token | -1 | null | undefined;
                 const yieldRes: Partial<SequenceEvaluateOutput<{probabilities: true, confidence: true}>> = {};
 
@@ -1789,7 +1789,7 @@ export class LlamaContextSequence {
                                 tokenBias
                             });
 
-                            return withLock(sampler, "sample", async () => {
+                            return withLock([sampler, "sample"], async () => {
                                 if (sampler.disposed)
                                     return null;
 
@@ -1847,7 +1847,7 @@ export class LlamaContextSequence {
                     evalTokens = [nextToken];
             }
         } finally {
-            void withLock(sampler, "sample", sampler.asyncDispose);
+            void withLock([sampler, "sample"], sampler.asyncDispose);
         }
     }
 
@@ -1895,7 +1895,7 @@ export class LlamaContextSequence {
         try {
             while (true) {
                 this._ensureNotDisposed();
-                const evaluatorLock = await acquireLock(this._lock, "evaluate");
+                const evaluatorLock = await acquireLock([this._lock, "evaluate"]);
                 let nextToken: Token | undefined;
                 const yieldRes: Partial<SequenceEvaluateOutput<{probabilities: true, confidence: true}>> = {};
 
@@ -2019,7 +2019,7 @@ export class LlamaContextSequence {
                                     tokenBias
                                 });
 
-                                return withLock(sampler, "sample", async () => {
+                                return withLock([sampler, "sample"], async () => {
                                     if (sampler.disposed)
                                         return null;
 
@@ -2112,7 +2112,7 @@ export class LlamaContextSequence {
                 logitsArray[logitsStartIndex] = true;
             }
         } finally {
-            void withLock(sampler, "sample", sampler.asyncDispose);
+            void withLock([sampler, "sample"], sampler.asyncDispose);
 
             if (this._tokenPredictorOwner === tokenPredictorOwner)
                 tokenPredictor.stop();
diff --git a/src/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.ts b/src/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.ts
index d20e3522..18ff71f7 100644
--- a/src/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.ts
+++ b/src/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.ts
@@ -119,7 +119,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor {
         targetSequence.context._ctx.ensureDraftContextIsCompatibleForSpeculative(this._draftSequence.context._ctx);
 
         try {
-            await withLock(this, "evaluate", currentAbortSignal, async () => {
+            await withLock([this as DraftSequenceTokenPredictor, "evaluate"], currentAbortSignal, async () => {
                 this._stateTokens = stateTokens.slice();
                 this._pendingEvalTokens = [];
                 this._predictedTokens = [];
@@ -157,7 +157,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor {
         const grammarEvaluationStateOption = this._evaluateOptions.grammarEvaluationState instanceof Function
             ? this._evaluateOptions.grammarEvaluationState()?.clone()
             : this._evaluateOptions.grammarEvaluationState?.clone();
-        void withLock(this, "pushTokens", async () => {
+        void withLock([this as DraftSequenceTokenPredictor, "pushTokens"], async () => {
             this._grammarEvaluationStateOption = grammarEvaluationStateOption;
 
             const tokensToPush = tokens.slice();
@@ -226,7 +226,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor {
         if (untilPredictionsExhausted)
             this._waitForPredictionExhaustion = true;
 
-        void withLock(this, "evaluate", async () => {
+        void withLock([this as DraftSequenceTokenPredictor, "evaluate"], async () => {
             this._iterator?.return();
             this._iterator = undefined;
         });
@@ -238,7 +238,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor {
         this._resetAbortController.abort();
         this._currentEvaluationAbortController.abort();
 
-        void withLock(this, "evaluate", async () => {
+        void withLock([this as DraftSequenceTokenPredictor, "evaluate"], async () => {
             this._iterator?.return();
             this._iterator = undefined;
         });
@@ -255,7 +255,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor {
             return;
 
         this._active = true;
-        void withLock(this, "evaluate", async () => {
+        void withLock([this as DraftSequenceTokenPredictor, "evaluate"], async () => {
             try {
                 const abortSignal = this._currentEvaluationAbortController.signal;
 
diff --git a/src/evaluator/LlamaEmbeddingContext.ts b/src/evaluator/LlamaEmbeddingContext.ts
index 0e8695bc..cd91d747 100644
--- a/src/evaluator/LlamaEmbeddingContext.ts
+++ b/src/evaluator/LlamaEmbeddingContext.ts
@@ -96,7 +96,7 @@ export class LlamaEmbeddingContext {
         if (endToken != null && resolvedInput.at(-1) !== endToken)
             resolvedInput.push(endToken);
 
-        return await withLock(this, "evaluate", async () => {
+        return await withLock([this as LlamaEmbeddingContext, "evaluate"], async () => {
             await this._sequence.eraseContextTokenRanges([{
                 start: 0,
                 end: this._sequence.nextTokenIndex
diff --git a/src/evaluator/LlamaModel/LlamaModel.ts b/src/evaluator/LlamaModel/LlamaModel.ts
index f53ab21a..9d7fa343 100644
--- a/src/evaluator/LlamaModel/LlamaModel.ts
+++ b/src/evaluator/LlamaModel/LlamaModel.ts
@@ -537,7 +537,7 @@ export class LlamaModel {
         if (this._vocabOnly)
             throw new Error("Model is loaded in vocabOnly mode, so no context can be created");
 
-        return await withLock(this._llama._memoryLock, LlamaLocks.loadToMemory, options.createSignal, async () => {
+        return await withLock([this._llama._memoryLock, LlamaLocks.loadToMemory], options.createSignal, async () => {
             const preventDisposalHandle = this._backendModelDisposeGuard.createPreventDisposalHandle();
             try {
                 return await LlamaContext._create(options, {_model: this});
@@ -673,7 +673,7 @@ export class LlamaModel {
         if (this._loraAdapters.has(resolvedPath))
             return this._loraAdapters.get(resolvedPath)!;
 
-        return await withLock(this._loraAdapters, "modify", async () => {
+        return await withLock([this._loraAdapters, "modify"], async () => {
             if (this._loraAdapters.has(resolvedPath))
                 return this._loraAdapters.get(resolvedPath)!;
 
@@ -687,7 +687,7 @@ export class LlamaModel {
 
     /** @internal */
     public async _removeLoraUsage(loraAdapters: Set<AddonModelLora>) {
-        return await withLock(this._loraAdapters, "modify", async () => {
+        return await withLock([this._loraAdapters, "modify"], async () => {
             await Promise.all(
                 [...loraAdapters].map(async (lora) => {
                     lora.usages--;
diff --git a/src/evaluator/LlamaRankingContext.ts b/src/evaluator/LlamaRankingContext.ts
index 8a32251e..71ee32e9 100644
--- a/src/evaluator/LlamaRankingContext.ts
+++ b/src/evaluator/LlamaRankingContext.ts
@@ -185,7 +185,7 @@ export class LlamaRankingContext {
         if (input.length === 0)
             return Promise.resolve(0);
 
-        return withLock(this, "evaluate", async () => {
+        return withLock([this as LlamaRankingContext, "evaluate"], async () => {
             await this._sequence.eraseContextTokenRanges([{
                 start: 0,
                 end: this._sequence.nextTokenIndex
diff --git a/src/gguf/fileReaders/GgufFsFileReader.ts b/src/gguf/fileReaders/GgufFsFileReader.ts
index d6200b6b..9080083f 100644
--- a/src/gguf/fileReaders/GgufFsFileReader.ts
+++ b/src/gguf/fileReaders/GgufFsFileReader.ts
@@ -52,7 +52,7 @@ export class GgufFsFileReader extends GgufFileReader {
     }
 
     private async _readToExpandBufferUpToOffset(endOffset: number, extraAllocationSize: number = defaultExtraAllocationSize) {
-        return await withLock(this, "modifyBuffer", this._signal, async () => {
+        return await withLock([this as GgufFsFileReader, "modifyBuffer"], this._signal, async () => {
             if (endOffset < this._buffer.length)
                 return;
 
diff --git a/src/gguf/fileReaders/GgufNetworkFetchFileReader.ts b/src/gguf/fileReaders/GgufNetworkFetchFileReader.ts
index 2fd2eb6f..11113420 100644
--- a/src/gguf/fileReaders/GgufNetworkFetchFileReader.ts
+++ b/src/gguf/fileReaders/GgufNetworkFetchFileReader.ts
@@ -66,7 +66,7 @@ export class GgufNetworkFetchFileReader extends GgufFileReader {
     }
 
     private async _fetchToExpandBufferUpToOffset(endOffset: number, extraAllocationSize: number = defaultExtraAllocationSize) {
-        await withLock(this, "modifyBuffer", this._signal, async () => {
+        await withLock([this as GgufNetworkFetchFileReader, "modifyBuffer"], this._signal, async () => {
             if (endOffset < this._buffer.length)
                 return;
 
diff --git a/src/index.ts b/src/index.ts
index 6605c331..2332caaa 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -4,7 +4,7 @@ import {getLlama, type LlamaOptions, type LastBuildOptions} from "./bindings/get
 import {getLlamaGpuTypes} from "./bindings/utils/getLlamaGpuTypes.js";
 import {NoBinaryFoundError} from "./bindings/utils/NoBinaryFoundError.js";
 import {
-    type LlamaGpuType, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType
+    type LlamaGpuType, type LlamaNuma, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType
 } from "./bindings/types.js";
 import {resolveModelFile, type ResolveModelFileOptions} from "./utils/resolveModelFile.js";
 import {LlamaModel, LlamaModelInfillTokens, type LlamaModelOptions, LlamaModelTokens} from "./evaluator/LlamaModel/LlamaModel.js";
@@ -124,6 +124,7 @@ export {
     type LlamaOptions,
     type LastBuildOptions,
     type LlamaGpuType,
+    type LlamaNuma,
     type LlamaClasses,
     LlamaLogLevel,
     NoBinaryFoundError,
diff --git a/src/utils/LruCache.ts b/src/utils/LruCache.ts
index 9e224cd3..6d6b40a2 100644
--- a/src/utils/LruCache.ts
+++ b/src/utils/LruCache.ts
@@ -12,7 +12,7 @@ export class LruCache<Key, Value> {
         this._onDelete = onDelete;
     }
 
-    public get(key: Key) {
+    public get(key: Key): Value | undefined {
         if (!this._cache.has(key))
             return undefined;
 
diff --git a/src/utils/ReplHistory.ts b/src/utils/ReplHistory.ts
index 11be485a..194c1d88 100644
--- a/src/utils/ReplHistory.ts
+++ b/src/utils/ReplHistory.ts
@@ -25,7 +25,7 @@ export class ReplHistory {
             return;
         }
 
-        await withLock(this, "file", async () => {
+        await withLock([this as ReplHistory, "file"], async () => {
             try {
                 const json = parseReplJsonfile(await fs.readJSON(this._filePath!));
                 this._fileContent = this._addItemToHistory(line, json);
diff --git a/src/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.ts b/src/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.ts
index a78cd82a..817279ef 100644
--- a/src/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.ts
+++ b/src/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.ts
@@ -199,5 +199,8 @@ export function getGbnfJsonTerminalForGbnfJsonSchema(
     if (isGbnfJsonBasicSchemaIncludesType(schema, "null"))
         terminals.push(new GbnfNull());
 
+    if (terminals.length === 0)
+        terminals.push(new GbnfNull());
+
     return new GbnfOr(terminals);
 }
diff --git a/src/utils/isLockfileActive.ts b/src/utils/isLockfileActive.ts
index 8aae5645..f9b36b7a 100644
--- a/src/utils/isLockfileActive.ts
+++ b/src/utils/isLockfileActive.ts
@@ -7,12 +7,12 @@ export async function isLockfileActive({
 }: {
     resourcePath: string, staleDuration?: number
 }) {
-    if (isLockActive(lockfileLockScope, resourcePath))
+    if (isLockActive([lockfileLockScope, resourcePath]))
         return true;
 
     const lockfileActive = await lockfile.check(resourcePath, {stale: staleDuration, realpath: false});
     if (lockfileActive)
         return true;
 
-    return isLockActive(lockfileLockScope, resourcePath);
+    return isLockActive([lockfileLockScope, resourcePath]);
 }
diff --git a/src/utils/utilTypes.ts b/src/utils/utilTypes.ts
index b7b96191..53211fd5 100644
--- a/src/utils/utilTypes.ts
+++ b/src/utils/utilTypes.ts
@@ -2,6 +2,16 @@ export type Writable<T> = {
     -readonly [P in keyof T]: T[P];
 };
 
+/**
+ * Omit all the keys from `Value` that are not present in `Options` and are `true`.
+ *
+ * For example:
+ * ```ts
+ * type Value = {a: number, b: string, c: boolean};
+ * type Options = {a: true, b: false, c: true};
+ * type Result = PickOptions<Value, Options>; // {a: number, c: boolean}
+ * ```
+ */
 export type PickOptions<
     Value extends Readonly<Record<string, any>>,
     Options extends {readonly [key: string]: boolean | undefined}
diff --git a/src/utils/waitForLockfileRelease.ts b/src/utils/waitForLockfileRelease.ts
index 4cf385ae..47581bee 100644
--- a/src/utils/waitForLockfileRelease.ts
+++ b/src/utils/waitForLockfileRelease.ts
@@ -8,13 +8,13 @@ export async function waitForLockfileRelease({
     resourcePath: string, checkInterval?: number, staleDuration?: number
 }) {
     while (true) {
-        if (isLockActive(lockfileLockScope, resourcePath)) {
-            await waitForLockRelease(lockfileLockScope, resourcePath);
+        if (isLockActive([lockfileLockScope, resourcePath])) {
+            await waitForLockRelease([lockfileLockScope, resourcePath]);
             continue;
         }
 
         const lockfileActive = await lockfile.check(resourcePath, {stale: staleDuration, realpath: false});
-        const lockIsActive = isLockActive(lockfileLockScope, resourcePath);
+        const lockIsActive = isLockActive([lockfileLockScope, resourcePath]);
 
         if (lockIsActive)
             continue;
diff --git a/src/utils/withLockfile.ts b/src/utils/withLockfile.ts
index f5a962da..09edf3cb 100644
--- a/src/utils/withLockfile.ts
+++ b/src/utils/withLockfile.ts
@@ -12,7 +12,7 @@ export async function withLockfile<const T>(
     },
     callback: () => T | Promise<T>
 ): Promise<T> {
-    return await withLock(lockfileLockScope, resourcePath, async () => {
+    return await withLock([lockfileLockScope, resourcePath], async () => {
         let releaseLock: () => Promise<void>;
         let res: T;
 
diff --git a/templates/electron-typescript-react/electron/state/llmState.ts b/templates/electron-typescript-react/electron/state/llmState.ts
index 7aee4647..060e89bb 100644
--- a/templates/electron-typescript-react/electron/state/llmState.ts
+++ b/templates/electron-typescript-react/electron/state/llmState.ts
@@ -95,7 +95,7 @@ let inProgressResponse: SimplifiedModelChatItem["message"] = [];
 
 export const llmFunctions = {
     async loadLlama() {
-        await withLock(llmFunctions, "llama", async () => {
+        await withLock([llmFunctions, "llama"], async () => {
             if (llama != null) {
                 try {
                     await llama.dispose();
@@ -136,7 +136,7 @@ export const llmFunctions = {
         });
     },
     async loadModel(modelPath: string) {
-        await withLock(llmFunctions, "model", async () => {
+        await withLock([llmFunctions, "model"], async () => {
             if (llama == null)
                 throw new Error("Llama not loaded");
 
@@ -198,7 +198,7 @@ export const llmFunctions = {
         });
     },
     async createContext() {
-        await withLock(llmFunctions, "context", async () => {
+        await withLock([llmFunctions, "context"], async () => {
             if (model == null)
                 throw new Error("Model not loaded");
 
@@ -242,7 +242,7 @@ export const llmFunctions = {
         });
     },
     async createContextSequence() {
-        await withLock(llmFunctions, "contextSequence", async () => {
+        await withLock([llmFunctions, "contextSequence"], async () => {
             if (context == null)
                 throw new Error("Context not loaded");
 
@@ -278,7 +278,7 @@ export const llmFunctions = {
     },
     chatSession: {
         async createChatSession() {
-            await withLock(llmFunctions, "chatSession", async () => {
+            await withLock([llmFunctions, "chatSession"], async () => {
                 if (contextSequence == null)
                     throw new Error("Context sequence not loaded");
 
@@ -336,7 +336,7 @@ export const llmFunctions = {
             });
         },
         async prompt(message: string) {
-            await withLock(llmFunctions, "chatSession", async () => {
+            await withLock([llmFunctions, "chatSession"], async () => {
                 if (chatSession == null)
                     throw new Error("Chat session not loaded");
 
diff --git a/templates/electron-typescript-react/package.json b/templates/electron-typescript-react/package.json
index df1afcf1..bac000cd 100644
--- a/templates/electron-typescript-react/package.json
+++ b/templates/electron-typescript-react/package.json
@@ -26,7 +26,7 @@
     "birpc": "^2.3.0",
     "classnames": "^2.5.1",
     "highlight.js": "^11.11.1",
-    "lifecycle-utils": "^2.0.0",
+    "lifecycle-utils": "^3.0.1",
     "markdown-it": "^14.1.0",
     "node-llama-cpp": "file:../..",
     "pretty-ms": "^9.2.0",
diff --git a/templates/electron-typescript-react/src/App/components/Header/components/UpdateBadge.tsx b/templates/electron-typescript-react/src/App/components/Header/components/UpdateBadge.tsx
index 5ec1d9fc..9d978cf5 100644
--- a/templates/electron-typescript-react/src/App/components/Header/components/UpdateBadge.tsx
+++ b/templates/electron-typescript-react/src/App/components/Header/components/UpdateBadge.tsx
@@ -25,7 +25,7 @@ export function UpdateBadge({appVersion, canShowCurrentVersion}: UpdateBadgeProp
 
     const updateLatestVersionInfo = useCallback(async () => {
         clearTimeout(nextUpdateTimeoutRef.current);
-        await withLock(instanceLock.current, "updateVersion", async () => {
+        await withLock([instanceLock.current, "updateVersion"], async () => {
             clearTimeout(nextUpdateTimeoutRef.current);
 
             const latestVersion = await getLatestAvailableVersion(appVersionIsBeta ?? false);
diff --git a/test/modelDependent/bgeReranker/rank.test.ts b/test/modelDependent/bgeReranker/rank.test.ts
index bfc90c33..b82db45e 100644
--- a/test/modelDependent/bgeReranker/rank.test.ts
+++ b/test/modelDependent/bgeReranker/rank.test.ts
@@ -28,7 +28,7 @@ describe("bgeReranker", () => {
                 "Cleaning the house is a good way to keep it tidy"
             ];
 
-            const query = "Tell me a geographical fact";
+            const query = "Tell me a nature geographical fact";
 
             const ranks = await Promise.all(
                 documents.map((doc) => rankingContext.rank(query, doc))
@@ -40,19 +40,19 @@ describe("bgeReranker", () => {
             const highestRankDocument = documents[highestRankIndex];
             expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world");
 
-            expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.01798620996209156");
+            expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.0024726231566347743");
             expect(simplifyRanks(ranks)).toMatchInlineSnapshot(`
               [
-                0.000013674009084599736,
-                0.000013674009084599736,
-                0.000013674009084599736,
-                0.003684239899435989,
-                0.000013674009084599736,
-                0.01798620996209156,
-                0.000013674009084599736,
-                0.000013674009084599736,
                 0.00002039908727992137,
-                0.000013674009084599736,
+                0.00006772414961977023,
+                0.00003716893710288947,
+                0.00003716893710288947,
+                0.00003716893710288947,
+                0.0024726231566347743,
+                0.00003716893710288947,
+                0.00003716893710288947,
+                0.00002039908727992137,
+                0.00003716893710288947,
               ]
             `);
         });
@@ -81,7 +81,7 @@ describe("bgeReranker", () => {
                 "Cleaning the house is a good way to keep it tidy"
             ];
 
-            const query = "Tell me a geographical fact";
+            const query = "Tell me a nature geographical fact";
 
             const ranks = await rankingContext.rankAll(query, documents);
 
@@ -91,19 +91,19 @@ describe("bgeReranker", () => {
             const highestRankDocument = documents[highestRankIndex];
             expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world");
 
-            expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.01798620996209156");
+            expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.0024726231566347743");
             expect(simplifyRanks(ranks)).toMatchInlineSnapshot(`
               [
-                0.000013674009084599736,
-                0.000013674009084599736,
-                0.000013674009084599736,
-                0.003684239899435989,
-                0.000013674009084599736,
-                0.01798620996209156,
-                0.000013674009084599736,
-                0.000013674009084599736,
                 0.00002039908727992137,
-                0.000013674009084599736,
+                0.00006772414961977023,
+                0.00003716893710288947,
+                0.00003716893710288947,
+                0.00003716893710288947,
+                0.0024726231566347743,
+                0.00003716893710288947,
+                0.00003716893710288947,
+                0.00002039908727992137,
+                0.00003716893710288947,
               ]
             `);
         });
@@ -130,7 +130,7 @@ describe("bgeReranker", () => {
                 "Cleaning the house is a good way to keep it tidy"
             ];
 
-            const query = "Tell me a geographical fact";
+            const query = "Tell me a nature geographical fact";
 
             const rankedDocuments = await rankingContext.rankAndSort(query, documents);
 
@@ -141,42 +141,42 @@ describe("bgeReranker", () => {
             expect(simplifySortedRanks([topDocument])[0]).toMatchInlineSnapshot(`
               {
                 "document": "Mount Everest is the tallest mountain in the world",
-                "score": 0.01798620996209156,
+                "score": 0.0024726231566347743,
               }
             `);
             expect(simplifySortedRanks(rankedDocuments)).toMatchInlineSnapshot(`
               [
                 {
                   "document": "Mount Everest is the tallest mountain in the world",
-                  "score": 0.01798620996209156,
+                  "score": 0.0024726231566347743,
                 },
                 {
-                  "document": "The capital of France is Paris",
-                  "score": 0.003684239899435989,
+                  "document": "I love eating pizza with extra cheese",
+                  "score": 0.00006772414961977023,
                 },
                 {
-                  "document": "Not all the things that shine are made of gold",
-                  "score": 0.00002039908727992137,
+                  "document": "The capital of France is Paris",
+                  "score": 0.00003716893710288947,
                 },
                 {
-                  "document": "I love eating pizza with extra cheese",
-                  "score": 0.000013674009084599736,
+                  "document": "Dogs love to play fetch with their owners",
+                  "score": 0.00003716893710288947,
                 },
                 {
-                  "document": "Dogs love to play fetch with their owners",
-                  "score": 0.000013674009084599736,
+                  "document": "Cleaning the house is a good way to keep it tidy",
+                  "score": 0.00003716893710288947,
                 },
                 {
-                  "document": "The sky is clear and blue today",
-                  "score": 0.000013674009084599736,
+                  "document": "A warm cup of tea is perfect for a cold winter day",
+                  "score": 0.00003716893710288947,
                 },
                 {
-                  "document": "Cleaning the house is a good way to keep it tidy",
-                  "score": 0.000013674009084599736,
+                  "document": "Not all the things that shine are made of gold",
+                  "score": 0.00002039908727992137,
                 },
                 {
-                  "document": "A warm cup of tea is perfect for a cold winter day",
-                  "score": 0.000013674009084599736,
+                  "document": "The sky is clear and blue today",
+                  "score": 0.00002039908727992137,
                 },
               ]
             `);
@@ -196,7 +196,7 @@ function simplifySortedRanks<const T extends {document: string, score: number}[]
 }
 
 function simplifyScore(score: number) {
-    return toSigmoid(parseFloat(roundToPrecision(toLogit(score), 0.4).toFixed(1)));
+    return toSigmoid(parseFloat(roundToPrecision(toLogit(score), 0.6).toFixed(1)));
 }
 
 function roundToPrecision(value: number, precision: number): number {
diff --git a/test/modelDependent/functionary/chatSession.test.ts b/test/modelDependent/functionary/chatSession.test.ts
index 71900290..c4cbc440 100644
--- a/test/modelDependent/functionary/chatSession.test.ts
+++ b/test/modelDependent/functionary/chatSession.test.ts
@@ -76,17 +76,17 @@ describe("functionary", () => {
                 contextSequence: contextSequence2
             });
 
-            const res2 = await chatSession2.prompt("How much is 6+6+6");
+            const res2 = await chatSession2.prompt("How much is 6+6+6?");
 
             const tokenMeterState2 = contextSequence2.tokenMeter.getState();
             expect(tokenMeterState2).to.toMatchInlineSnapshot(`
               {
-                "usedInputTokens": 82,
-                "usedOutputTokens": 14,
+                "usedInputTokens": 83,
+                "usedOutputTokens": 11,
               }
             `);
             expect(tokenMeterState2.usedInputTokens).to.be.greaterThanOrEqual(tokenMeterState.usedInputTokens);
-            expect(res2).to.eql("The sum of 6+6+6 is 18.");
+            expect(res2).to.eql("6 + 6 + 6 = 18");
         });
 
         test("reusing a context sequences utilizes existing state", {timeout: 1000 * 60 * 60 * 2}, async () => {
diff --git a/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts b/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts
index d8247dd9..3ccf0673 100644
--- a/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts
+++ b/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts
@@ -114,7 +114,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 6
                         });
                         expect(res.gpuLayers).to.eql(0);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7718");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7717");
                     }
                     {
                         const res = await resolveGpuLayers(0, {
@@ -255,7 +255,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 4.5
                         });
                         expect(res.gpuLayers).to.eql(16);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("4016");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("4011");
                     }
                     try {
                         await resolveGpuLayers(16, {
@@ -318,7 +318,7 @@ describe("functionary", () => {
                             ignoreMemorySafetyChecks: true
                         });
                         expect(res.gpuLayers).to.eql(0);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7718");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7717");
                     }
                 });
 
@@ -343,7 +343,7 @@ describe("functionary", () => {
                             unifiedMemorySize: s1GB * 7.3
                         });
                         expect(res.gpuLayers).to.eql(16);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("1760");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("1757");
                     }
                     {
                         const res = await resolveGpuLayers(16, {
@@ -354,7 +354,7 @@ describe("functionary", () => {
                             unifiedMemorySize: s1GB * 5.3
                         });
                         expect(res.gpuLayers).to.eql(16);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("5505");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("5502");
                     }
                     try {
                         await resolveGpuLayers(16, {
@@ -783,7 +783,7 @@ describe("functionary", () => {
                             llamaGpu: false
                         });
                         expect(res.gpuLayers).to.eql(0);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7718");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7717");
                     }
                     {
                         const res = await resolveGpuLayers(33, {
@@ -795,7 +795,7 @@ describe("functionary", () => {
                             ignoreMemorySafetyChecks: true
                         });
                         expect(res.gpuLayers).to.eql(0);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7718");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7717");
                     }
                 });
 
@@ -809,7 +809,7 @@ describe("functionary", () => {
                             unifiedMemorySize: s1GB * 6
                         });
                         expect(res.gpuLayers).to.eql(33);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("6251");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("6248");
                     }
                     {
                         const res = await resolveGpuLayers(33, {
@@ -820,7 +820,7 @@ describe("functionary", () => {
                             unifiedMemorySize: s1GB * 6
                         });
                         expect(res.gpuLayers).to.eql(33);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("2974");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("2972");
                     }
                     {
                         const res = await resolveGpuLayers(33, {
@@ -831,7 +831,7 @@ describe("functionary", () => {
                             unifiedMemorySize: s1GB * 6
                         });
                         expect(res.gpuLayers).to.eql(33);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("1336");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("1333");
                     }
                     try {
                         await resolveGpuLayers(33, {
@@ -908,7 +908,7 @@ describe("functionary", () => {
                         freeRam: s1GB * 1
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("472");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("471");
                 }
                 {
                     const res = await resolveGpuLayers("max", {
@@ -918,7 +918,7 @@ describe("functionary", () => {
                         freeRam: s1GB * 1
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("898");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("895");
                 }
             });
 
@@ -962,7 +962,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 8
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("7");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7483");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7471");
                     }
                     {
                         const res = await resolveGpuLayers("auto", {
@@ -1062,7 +1062,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 8
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("4721");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("4718");
                     }
                     {
                         const res = await resolveGpuLayers("auto", {
@@ -1072,7 +1072,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 8
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7998");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7995");
                     }
                     {
                         const res = await resolveGpuLayers("auto", {
@@ -1125,7 +1125,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 5
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("7");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7483");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7471");
                     }
                     {
                         const res = await resolveGpuLayers("auto", {
@@ -1225,7 +1225,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 5
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("4721");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("4718");
                     }
                     {
                         const res = await resolveGpuLayers("auto", {
@@ -1235,7 +1235,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 5
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7998");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7995");
                     }
                     {
                         const res = await resolveGpuLayers("auto", {
@@ -1349,7 +1349,7 @@ describe("functionary", () => {
                         expect(res.gpuLayers).to.be.gte(16);
                         expect(res.gpuLayers).to.be.lte(24);
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("16");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("4016");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("4011");
                     }
                 });
 
@@ -1451,7 +1451,7 @@ describe("functionary", () => {
                         expect(res.gpuLayers).to.be.gte(16);
                         expect(res.gpuLayers).to.be.lte(24);
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("16");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("4016");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("4011");
                     }
                 });
             });
@@ -1480,7 +1480,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 8
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("21");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("6535");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("6531");
                         expect(res.contextSize).to.be.gte(contextSize);
                     }
                     {
@@ -1492,7 +1492,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 8
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("7");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7483");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7471");
                         expect(res.contextSize).to.be.gte(contextSize);
                     }
                     {
@@ -1569,7 +1569,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 7
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("21");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("6535");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("6531");
                         expect(res.contextSize).to.be.gte(contextSize);
                     }
                     {
@@ -1581,7 +1581,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 7
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("7");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7483");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7471");
                         expect(res.contextSize).to.be.gte(contextSize);
                     }
                     {
diff --git a/test/modelDependent/functionary/gguf/ggufInsights.test.ts b/test/modelDependent/functionary/gguf/ggufInsights.test.ts
index ee193e2c..b7f22d33 100644
--- a/test/modelDependent/functionary/gguf/ggufInsights.test.ts
+++ b/test/modelDependent/functionary/gguf/ggufInsights.test.ts
@@ -92,7 +92,7 @@ describe("gguf", async () => {
 
             const modelVramUsageDiff = currentModelVramUsage - initialModelVramUsage;
 
-            const s300MB = 300 * Math.pow(1024, 2);
+            const s330MB = 330 * Math.pow(1024, 2);
             const s5MB = 5 * Math.pow(1024, 2);
 
             const estimatedModelResourceUsage = ggufInsights.estimateModelResourceRequirements({
@@ -100,7 +100,7 @@ describe("gguf", async () => {
             });
             expect(toBytes(estimatedModelResourceUsage.gpuVram)).toMatchInlineSnapshot('"4.06GB"');
             expect(toBytes(estimatedModelResourceUsage.cpuRam)).toMatchInlineSnapshot('"281.81MB"');
-            expect(Math.abs(modelVramUsageDiff - estimatedModelResourceUsage.gpuVram)).to.be.lte(s300MB);
+            expect(Math.abs(modelVramUsageDiff - estimatedModelResourceUsage.gpuVram)).to.be.lte(s330MB);
 
             const modelEstimationDiffWithActual = estimatedModelResourceUsage.gpuVram + estimatedModelResourceUsage.cpuRam - model.size;
             expect(Math.abs(modelEstimationDiffWithActual)).to.be.lte(s5MB); // tolerate such a small difference
@@ -125,7 +125,7 @@ describe("gguf", async () => {
                 modelGpuLayers: ggufInsights.totalLayers
             }).gpuVram;
             expect(toBytes(estimatedContextVramUsage)).toMatchInlineSnapshot("\"1.03GB\"");
-            expect(Math.abs(contextVramUsageDiff - estimatedContextVramUsage)).to.be.lte(s300MB);
+            expect(Math.abs(contextVramUsageDiff - estimatedContextVramUsage)).to.be.lte(s330MB);
 
             await model.dispose();
         });
@@ -190,7 +190,7 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "643.07MB",
+                "cpuRam": "643.08MB",
                 "gpuVram": "0B",
               }
             `);
@@ -201,7 +201,7 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "451.07MB",
+                "cpuRam": "451.08MB",
                 "gpuVram": "0B",
               }
             `);
@@ -214,7 +214,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "1.71GB",
-                "gpuVram": "355.25MB",
+                "gpuVram": "355.75MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -224,8 +224,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "1002.8MB",
-                "gpuVram": "315.25MB",
+                "cpuRam": "1002.82MB",
+                "gpuVram": "315.75MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -235,8 +235,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "630.8MB",
-                "gpuVram": "295.25MB",
+                "cpuRam": "630.82MB",
+                "gpuVram": "295.75MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -246,8 +246,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "444.8MB",
-                "gpuVram": "285.25MB",
+                "cpuRam": "444.82MB",
+                "gpuVram": "285.75MB",
               }
             `);
 
@@ -258,7 +258,7 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "1022.78MB",
+                "cpuRam": "1022.79MB",
                 "gpuVram": "1.05GB",
               }
             `);
@@ -269,8 +269,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "638.78MB",
-                "gpuVram": "679.25MB",
+                "cpuRam": "638.79MB",
+                "gpuVram": "679.75MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -280,8 +280,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "446.78MB",
-                "gpuVram": "479.25MB",
+                "cpuRam": "446.79MB",
+                "gpuVram": "479.75MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -291,8 +291,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "350.78MB",
-                "gpuVram": "379.25MB",
+                "cpuRam": "350.79MB",
+                "gpuVram": "379.75MB",
               }
             `);
 
@@ -326,7 +326,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "667.52MB",
+                "gpuVram": "668.02MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -337,7 +337,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "475.52MB",
+                "gpuVram": "476.02MB",
               }
             `);
 
@@ -371,7 +371,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "667.52MB",
+                "gpuVram": "668.02MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -382,7 +382,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "475.52MB",
+                "gpuVram": "476.02MB",
               }
             `);
         });
diff --git a/test/modelDependent/llama3.1/chunkDocument.test.ts b/test/modelDependent/llama3.1/chunkDocument.test.ts
index 4484b3c2..6af8a6f0 100644
--- a/test/modelDependent/llama3.1/chunkDocument.test.ts
+++ b/test/modelDependent/llama3.1/chunkDocument.test.ts
@@ -28,7 +28,7 @@ const exampleParagraph = [
 
 describe("llama 3.1", () => {
     describe("chunk document", () => {
-        test("DraftModelTokenPredictor", {timeout: 1000 * 60 * 60 * 2}, async () => {
+        test("basic usage", {timeout: 1000 * 60 * 60 * 2}, async () => {
             const modelPath = await getModelFile("Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf");
             const llama = await getTestLlama();
 
diff --git a/test/modelDependent/llama3.1/controlledEvaluate.test.ts b/test/modelDependent/llama3.1/controlledEvaluate.test.ts
index c9dcde17..3145c84f 100644
--- a/test/modelDependent/llama3.1/controlledEvaluate.test.ts
+++ b/test/modelDependent/llama3.1/controlledEvaluate.test.ts
@@ -96,73 +96,73 @@ describe("llama 3.1", () => {
                 {
                   "next": {
                     "probabilities": Map {
-                      35308 => 0.5214946,
-                      27096 => 0.2432059,
-                      11 => 0.0221824,
-                      198 => 0.0119446,
-                      374 => 0.0083614,
-                      863 => 0.0083608,
-                      1131 => 0.0068347,
-                      25 => 0.0062433,
-                      7940 => 0.0054039,
-                      1 => 0.0051688,
+                      35308 => 0.5214539,
+                      27096 => 0.2432189,
+                      11 => 0.0221867,
+                      198 => 0.0119489,
+                      374 => 0.0083635,
+                      863 => 0.0083618,
+                      1131 => 0.0068354,
+                      25 => 0.0062467,
+                      7940 => 0.0054025,
+                      320 => 0.0051706,
                     },
                   },
                 },
                 {
                   "next": {
                     "probabilities": Map {
-                      927 => 0.9811904,
-                      198 => 0.0033849,
-                      6288 => 0.0032705,
+                      927 => 0.9811952,
+                      198 => 0.0033833,
+                      6288 => 0.00327,
                       279 => 0.0006553,
-                      1633 => 0.0003184,
-                      1035 => 0.0003114,
-                      13 => 0.0002917,
-                      264 => 0.0002895,
+                      1633 => 0.0003185,
+                      1035 => 0.0003111,
+                      13 => 0.0002916,
+                      264 => 0.0002894,
                       297 => 0.0002833,
-                      720 => 0.000249,
+                      720 => 0.0002489,
                     },
                     "token": 927,
                   },
                 },
                 {
                   "next": {
-                    "confidence": 0.9306729,
+                    "confidence": 0.930688,
                     "token": 279,
                   },
                 },
                 {
                   "next": {
-                    "confidence": 0.9597685,
+                    "confidence": 0.9597747,
                     "probabilities": Map {
-                      16053 => 0.9597685,
-                      1208 => 0.0047506,
-                      198 => 0.0031827,
-                      5679 => 0.0029162,
-                      65536 => 0.0019724,
-                      6435 => 0.0009124,
-                      2697 => 0.0006706,
+                      16053 => 0.9597747,
+                      1208 => 0.0047502,
+                      198 => 0.0031807,
+                      5679 => 0.0029171,
+                      65536 => 0.0019718,
+                      6435 => 0.0009126,
+                      2697 => 0.0006707,
                       720 => 0.0005979,
-                      21811 => 0.0005517,
-                      45363 => 0.0005495,
+                      21811 => 0.0005516,
+                      45363 => 0.0005494,
                     },
                   },
                 },
                 {
                   "next": {
-                    "confidence": 0.987146,
+                    "confidence": 0.9871562,
                     "probabilities": Map {
-                      5679 => 0.987146,
-                      21811 => 0.0014387,
-                      198 => 0.0009368,
-                      8415 => 0.0007225,
-                      12875 => 0.0003803,
-                      4194 => 0.000347,
-                      720 => 0.0002815,
-                      14588 => 0.0002761,
-                      9522 => 0.0002417,
-                      627 => 0.0002042,
+                      5679 => 0.9871562,
+                      21811 => 0.0014367,
+                      198 => 0.0009356,
+                      8415 => 0.0007227,
+                      12875 => 0.0003802,
+                      4194 => 0.0003468,
+                      720 => 0.0002813,
+                      14588 => 0.000276,
+                      9522 => 0.0002415,
+                      627 => 0.0002041,
                     },
                     "token": 5679,
                   },
diff --git a/test/modelDependent/llama3.1/evaluateWithMetadata.test.ts b/test/modelDependent/llama3.1/evaluateWithMetadata.test.ts
index 2bc659f5..d8142a4b 100644
--- a/test/modelDependent/llama3.1/evaluateWithMetadata.test.ts
+++ b/test/modelDependent/llama3.1/evaluateWithMetadata.test.ts
@@ -98,45 +98,45 @@ describe("llama 3.1", () => {
               [
                 {
                   "probabilities": Map {
-                    578 => 0.4307292,
-                    1115 => 0.1304377,
-                    1102 => 0.0516853,
-                    763 => 0.042889,
-                    1283 => 0.029397,
-                    2100 => 0.0293787,
-                    15636 => 0.0262684,
-                    2030 => 0.021849,
-                    320 => 0.016903,
-                    1628 => 0.0118695,
+                    578 => 0.4307095,
+                    1115 => 0.1304636,
+                    1102 => 0.0516819,
+                    763 => 0.0428933,
+                    1283 => 0.0293915,
+                    2100 => 0.0293782,
+                    15636 => 0.0262626,
+                    2030 => 0.0218519,
+                    320 => 0.0169018,
+                    1628 => 0.0118644,
                   },
                   "token": 578,
                 },
                 {
                   "probabilities": Map {
-                    16053 => 0.4222992,
-                    4062 => 0.3036339,
-                    39935 => 0.0603973,
-                    2944 => 0.0373043,
-                    5679 => 0.0238118,
-                    11914 => 0.0162981,
-                    2144 => 0.0146835,
-                    1121 => 0.0069849,
-                    17571 => 0.0057944,
-                    3446 => 0.0049346,
+                    16053 => 0.4223687,
+                    4062 => 0.303549,
+                    39935 => 0.0603321,
+                    2944 => 0.0373496,
+                    5679 => 0.0237923,
+                    11914 => 0.0163001,
+                    2144 => 0.0146822,
+                    1121 => 0.0069893,
+                    17571 => 0.0057973,
+                    3446 => 0.0049349,
                   },
                   "token": 16053,
                 },
                 {
                   "probabilities": Map {
-                    5679 => 0.9981185,
-                    12875 => 0.0001592,
+                    5679 => 0.9981177,
+                    12875 => 0.0001593,
                     18964 => 0.0001154,
-                    39935 => 0.000115,
-                    13 => 0.0001049,
+                    39935 => 0.0001149,
+                    13 => 0.000105,
                     627 => 0.0000928,
-                    656 => 0.0000625,
+                    656 => 0.0000626,
                     893 => 0.0000563,
-                    198 => 0.0000522,
+                    198 => 0.0000523,
                     374 => 0.0000519,
                   },
                   "token": 5679,
@@ -144,56 +144,56 @@ describe("llama 3.1", () => {
                 {
                   "probabilities": Map {
                     374 => 0.8126541,
-                    1587 => 0.0481526,
-                    596 => 0.0247368,
-                    1120 => 0.0223041,
-                    3250 => 0.0215465,
-                    706 => 0.0161833,
-                    15849 => 0.0086943,
-                    1053 => 0.0059125,
-                    55064 => 0.0037811,
+                    1587 => 0.0481505,
+                    596 => 0.0247274,
+                    1120 => 0.022311,
+                    3250 => 0.0215521,
+                    706 => 0.0161821,
+                    15849 => 0.0086956,
+                    1053 => 0.0059156,
+                    55064 => 0.0037815,
                     11 => 0.0036657,
                   },
                   "token": 374,
                 },
                 {
                   "probabilities": Map {
-                    2288 => 0.2757553,
-                    1120 => 0.1666547,
-                    539 => 0.1577473,
-                    779 => 0.133445,
-                    264 => 0.0558533,
-                    1101 => 0.0292142,
-                    16053 => 0.0176781,
-                    5042 => 0.015864,
-                    1193 => 0.014582,
-                    2744 => 0.0140904,
+                    2288 => 0.2758818,
+                    1120 => 0.1666409,
+                    539 => 0.1577165,
+                    779 => 0.1333762,
+                    264 => 0.0558459,
+                    1101 => 0.029207,
+                    16053 => 0.0176698,
+                    5042 => 0.0158617,
+                    1193 => 0.0145808,
+                    2744 => 0.0140919,
                   },
                   "token": 2288,
                 },
                 {
                   "probabilities": Map {
-                    16053 => 0.9065909,
-                    13326 => 0.0636439,
-                    19781 => 0.007158,
-                    17551 => 0.0020244,
-                    10968 => 0.0012683,
-                    11920 => 0.0011008,
-                    6435 => 0.0010087,
-                    34386 => 0.0007758,
-                    1208 => 0.0006099,
-                    25366 => 0.0005672,
+                    16053 => 0.9066046,
+                    13326 => 0.0636245,
+                    19781 => 0.007155,
+                    17551 => 0.0020255,
+                    10968 => 0.0012684,
+                    11920 => 0.001101,
+                    6435 => 0.001009,
+                    34386 => 0.0007755,
+                    1208 => 0.00061,
+                    25366 => 0.0005675,
                   },
                   "token": 16053,
                 },
                 {
                   "probabilities": Map {
-                    311 => 0.988279,
-                    1524 => 0.0061858,
-                    11 => 0.0025774,
+                    311 => 0.9882814,
+                    1524 => 0.0061828,
+                    11 => 0.0025772,
                     323 => 0.0005243,
                     13 => 0.0003535,
-                    627 => 0.0003211,
+                    627 => 0.0003212,
                     1606 => 0.0002642,
                     2288 => 0.0002583,
                     369 => 0.0001247,
@@ -203,46 +203,46 @@ describe("llama 3.1", () => {
                 },
                 {
                   "probabilities": Map {
-                    2512 => 0.7492248,
-                    1524 => 0.0989405,
-                    656 => 0.032411,
-                    636 => 0.0240648,
-                    7940 => 0.0144123,
-                    33586 => 0.0108691,
-                    387 => 0.0086826,
-                    1781 => 0.0058571,
-                    1629 => 0.005489,
-                    3351 => 0.0051125,
+                    2512 => 0.7492506,
+                    1524 => 0.0989418,
+                    656 => 0.032397,
+                    636 => 0.0240763,
+                    7940 => 0.0143969,
+                    33586 => 0.01087,
+                    387 => 0.0086808,
+                    1781 => 0.0058532,
+                    1629 => 0.0054883,
+                    3351 => 0.0051112,
                   },
                   "token": 2512,
                 },
                 {
                   "probabilities": Map {
-                    922 => 0.9521815,
-                    1606 => 0.015013,
-                    11 => 0.014011,
-                    430 => 0.0029686,
-                    627 => 0.002315,
-                    13 => 0.0018864,
-                    1524 => 0.0018013,
+                    922 => 0.9521582,
+                    1606 => 0.0150241,
+                    11 => 0.0140157,
+                    430 => 0.002969,
+                    627 => 0.0023168,
+                    13 => 0.0018882,
+                    1524 => 0.0018011,
                     369 => 0.0017693,
-                    323 => 0.0009247,
-                    382 => 0.0008479,
+                    323 => 0.0009252,
+                    382 => 0.0008483,
                   },
                   "token": 922,
                 },
                 {
                   "probabilities": Map {
-                    279 => 0.6508148,
-                    4205 => 0.3128796,
-                    1148 => 0.0113661,
-                    1690 => 0.004425,
-                    904 => 0.0030377,
-                    1202 => 0.0026803,
-                    264 => 0.0011171,
-                    1790 => 0.001086,
-                    813 => 0.0010579,
-                    1524 => 0.0007699,
+                    279 => 0.6508825,
+                    4205 => 0.3128083,
+                    1148 => 0.0113708,
+                    1690 => 0.0044266,
+                    904 => 0.0030378,
+                    1202 => 0.0026779,
+                    264 => 0.001117,
+                    1790 => 0.0010864,
+                    813 => 0.0010572,
+                    1524 => 0.0007698,
                   },
                   "token": 279,
                 },
@@ -280,15 +280,15 @@ describe("llama 3.1", () => {
             expect(res).toMatchInlineSnapshot(`
               [
                 {
-                  "confidence": 0.4307292,
+                  "confidence": 0.4307095,
                   "token": 578,
                 },
                 {
-                  "confidence": 0.4222992,
+                  "confidence": 0.4223687,
                   "token": 16053,
                 },
                 {
-                  "confidence": 0.9981185,
+                  "confidence": 0.9981177,
                   "token": 5679,
                 },
                 {
@@ -296,27 +296,27 @@ describe("llama 3.1", () => {
                   "token": 374,
                 },
                 {
-                  "confidence": 0.2757553,
+                  "confidence": 0.2758818,
                   "token": 2288,
                 },
                 {
-                  "confidence": 0.9065909,
+                  "confidence": 0.9066046,
                   "token": 16053,
                 },
                 {
-                  "confidence": 0.988279,
+                  "confidence": 0.9882814,
                   "token": 311,
                 },
                 {
-                  "confidence": 0.7492248,
+                  "confidence": 0.7492506,
                   "token": 2512,
                 },
                 {
-                  "confidence": 0.9521815,
+                  "confidence": 0.9521582,
                   "token": 922,
                 },
                 {
-                  "confidence": 0.6508148,
+                  "confidence": 0.6508825,
                   "token": 279,
                 },
               ]
@@ -353,49 +353,49 @@ describe("llama 3.1", () => {
             expect(res).toMatchInlineSnapshot(`
               [
                 {
-                  "confidence": 0.4307292,
+                  "confidence": 0.4307095,
                   "probabilities": Map {
-                    578 => 0.4307292,
-                    1115 => 0.1304377,
-                    1102 => 0.0516853,
-                    763 => 0.042889,
-                    1283 => 0.029397,
-                    2100 => 0.0293787,
-                    15636 => 0.0262684,
-                    2030 => 0.021849,
-                    320 => 0.016903,
-                    1628 => 0.0118695,
+                    578 => 0.4307095,
+                    1115 => 0.1304636,
+                    1102 => 0.0516819,
+                    763 => 0.0428933,
+                    1283 => 0.0293915,
+                    2100 => 0.0293782,
+                    15636 => 0.0262626,
+                    2030 => 0.0218519,
+                    320 => 0.0169018,
+                    1628 => 0.0118644,
                   },
                   "token": 578,
                 },
                 {
-                  "confidence": 0.4222992,
+                  "confidence": 0.4223687,
                   "probabilities": Map {
-                    16053 => 0.4222992,
-                    4062 => 0.3036339,
-                    39935 => 0.0603973,
-                    2944 => 0.0373043,
-                    5679 => 0.0238118,
-                    11914 => 0.0162981,
-                    2144 => 0.0146835,
-                    1121 => 0.0069849,
-                    17571 => 0.0057944,
-                    3446 => 0.0049346,
+                    16053 => 0.4223687,
+                    4062 => 0.303549,
+                    39935 => 0.0603321,
+                    2944 => 0.0373496,
+                    5679 => 0.0237923,
+                    11914 => 0.0163001,
+                    2144 => 0.0146822,
+                    1121 => 0.0069893,
+                    17571 => 0.0057973,
+                    3446 => 0.0049349,
                   },
                   "token": 16053,
                 },
                 {
-                  "confidence": 0.9981185,
+                  "confidence": 0.9981177,
                   "probabilities": Map {
-                    5679 => 0.9981185,
-                    12875 => 0.0001592,
+                    5679 => 0.9981177,
+                    12875 => 0.0001593,
                     18964 => 0.0001154,
-                    39935 => 0.000115,
-                    13 => 0.0001049,
+                    39935 => 0.0001149,
+                    13 => 0.000105,
                     627 => 0.0000928,
-                    656 => 0.0000625,
+                    656 => 0.0000626,
                     893 => 0.0000563,
-                    198 => 0.0000522,
+                    198 => 0.0000523,
                     374 => 0.0000519,
                   },
                   "token": 5679,
@@ -404,59 +404,59 @@ describe("llama 3.1", () => {
                   "confidence": 0.8126541,
                   "probabilities": Map {
                     374 => 0.8126541,
-                    1587 => 0.0481526,
-                    596 => 0.0247368,
-                    1120 => 0.0223041,
-                    3250 => 0.0215465,
-                    706 => 0.0161833,
-                    15849 => 0.0086943,
-                    1053 => 0.0059125,
-                    55064 => 0.0037811,
+                    1587 => 0.0481505,
+                    596 => 0.0247274,
+                    1120 => 0.022311,
+                    3250 => 0.0215521,
+                    706 => 0.0161821,
+                    15849 => 0.0086956,
+                    1053 => 0.0059156,
+                    55064 => 0.0037815,
                     11 => 0.0036657,
                   },
                   "token": 374,
                 },
                 {
-                  "confidence": 0.2757553,
+                  "confidence": 0.2758818,
                   "probabilities": Map {
-                    2288 => 0.2757553,
-                    1120 => 0.1666547,
-                    539 => 0.1577473,
-                    779 => 0.133445,
-                    264 => 0.0558533,
-                    1101 => 0.0292142,
-                    16053 => 0.0176781,
-                    5042 => 0.015864,
-                    1193 => 0.014582,
-                    2744 => 0.0140904,
+                    2288 => 0.2758818,
+                    1120 => 0.1666409,
+                    539 => 0.1577165,
+                    779 => 0.1333762,
+                    264 => 0.0558459,
+                    1101 => 0.029207,
+                    16053 => 0.0176698,
+                    5042 => 0.0158617,
+                    1193 => 0.0145808,
+                    2744 => 0.0140919,
                   },
                   "token": 2288,
                 },
                 {
-                  "confidence": 0.9065909,
+                  "confidence": 0.9066046,
                   "probabilities": Map {
-                    16053 => 0.9065909,
-                    13326 => 0.0636439,
-                    19781 => 0.007158,
-                    17551 => 0.0020244,
-                    10968 => 0.0012683,
-                    11920 => 0.0011008,
-                    6435 => 0.0010087,
-                    34386 => 0.0007758,
-                    1208 => 0.0006099,
-                    25366 => 0.0005672,
+                    16053 => 0.9066046,
+                    13326 => 0.0636245,
+                    19781 => 0.007155,
+                    17551 => 0.0020255,
+                    10968 => 0.0012684,
+                    11920 => 0.001101,
+                    6435 => 0.001009,
+                    34386 => 0.0007755,
+                    1208 => 0.00061,
+                    25366 => 0.0005675,
                   },
                   "token": 16053,
                 },
                 {
-                  "confidence": 0.988279,
+                  "confidence": 0.9882814,
                   "probabilities": Map {
-                    311 => 0.988279,
-                    1524 => 0.0061858,
-                    11 => 0.0025774,
+                    311 => 0.9882814,
+                    1524 => 0.0061828,
+                    11 => 0.0025772,
                     323 => 0.0005243,
                     13 => 0.0003535,
-                    627 => 0.0003211,
+                    627 => 0.0003212,
                     1606 => 0.0002642,
                     2288 => 0.0002583,
                     369 => 0.0001247,
@@ -465,50 +465,50 @@ describe("llama 3.1", () => {
                   "token": 311,
                 },
                 {
-                  "confidence": 0.7492248,
+                  "confidence": 0.7492506,
                   "probabilities": Map {
-                    2512 => 0.7492248,
-                    1524 => 0.0989405,
-                    656 => 0.032411,
-                    636 => 0.0240648,
-                    7940 => 0.0144123,
-                    33586 => 0.0108691,
-                    387 => 0.0086826,
-                    1781 => 0.0058571,
-                    1629 => 0.005489,
-                    3351 => 0.0051125,
+                    2512 => 0.7492506,
+                    1524 => 0.0989418,
+                    656 => 0.032397,
+                    636 => 0.0240763,
+                    7940 => 0.0143969,
+                    33586 => 0.01087,
+                    387 => 0.0086808,
+                    1781 => 0.0058532,
+                    1629 => 0.0054883,
+                    3351 => 0.0051112,
                   },
                   "token": 2512,
                 },
                 {
-                  "confidence": 0.9521815,
+                  "confidence": 0.9521582,
                   "probabilities": Map {
-                    922 => 0.9521815,
-                    1606 => 0.015013,
-                    11 => 0.014011,
-                    430 => 0.0029686,
-                    627 => 0.002315,
-                    13 => 0.0018864,
-                    1524 => 0.0018013,
+                    922 => 0.9521582,
+                    1606 => 0.0150241,
+                    11 => 0.0140157,
+                    430 => 0.002969,
+                    627 => 0.0023168,
+                    13 => 0.0018882,
+                    1524 => 0.0018011,
                     369 => 0.0017693,
-                    323 => 0.0009247,
-                    382 => 0.0008479,
+                    323 => 0.0009252,
+                    382 => 0.0008483,
                   },
                   "token": 922,
                 },
                 {
-                  "confidence": 0.6508148,
+                  "confidence": 0.6508825,
                   "probabilities": Map {
-                    279 => 0.6508148,
-                    4205 => 0.3128796,
-                    1148 => 0.0113661,
-                    1690 => 0.004425,
-                    904 => 0.0030377,
-                    1202 => 0.0026803,
-                    264 => 0.0011171,
-                    1790 => 0.001086,
-                    813 => 0.0010579,
-                    1524 => 0.0007699,
+                    279 => 0.6508825,
+                    4205 => 0.3128083,
+                    1148 => 0.0113708,
+                    1690 => 0.0044266,
+                    904 => 0.0030378,
+                    1202 => 0.0026779,
+                    264 => 0.001117,
+                    1790 => 0.0010864,
+                    813 => 0.0010572,
+                    1524 => 0.0007698,
                   },
                   "token": 279,
                 },
diff --git a/test/modelDependent/llama3.2/promptCompletion.test.ts b/test/modelDependent/llama3.2/promptCompletion.test.ts
index 9362f25e..a0db42f2 100644
--- a/test/modelDependent/llama3.2/promptCompletion.test.ts
+++ b/test/modelDependent/llama3.2/promptCompletion.test.ts
@@ -43,7 +43,7 @@ describe("llama 3.2", () => {
             const promptCompletion = await chatSession.completePrompt("Hi there!", {
                 maxTokens: 11
             });
-            expect(promptCompletion).toMatchInlineSnapshot(`" I'm looking for a new phone case. I need"`);
+            expect(promptCompletion).toMatchInlineSnapshot("\" I'm looking for a new phone case. I need\"");
             expect(LlamaText.fromTokens(model.tokenizer, chatSession.sequence.contextTokens)).toMatchInlineSnapshot(`
               LlamaText([
                 new SpecialToken("BOS"),
diff --git a/test/modelDependent/qwen3-0.6b/functions.test.ts b/test/modelDependent/qwen3-0.6b/functions.test.ts
index 07bd2899..46bd9f2f 100644
--- a/test/modelDependent/qwen3-0.6b/functions.test.ts
+++ b/test/modelDependent/qwen3-0.6b/functions.test.ts
@@ -198,7 +198,7 @@ describe("qwen3 0.6b", () => {
 
             const res2 = await chatSession.prompt([
                 "The owner has 3 apps: App1, App2, and App3.",
-                "Notify the owner with a main notifications about 'apps time', with sub notifications for each app with the app's name.",
+                "Notify the owner with a main notifications about 'apps time', with a sub-notification for each app with the app's name.",
                 "Under each app sub-notification add a sub-notification with the app's number."
             ].join("\n"), {
                 ...promptOptions,
diff --git a/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts b/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts
index c2ad773f..39a722b0 100644
--- a/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts
+++ b/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts
@@ -111,7 +111,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 3
                     });
                     expect(res.gpuLayers).to.eql(16);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("8064");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("8061");
                 }
                 try {
                     await resolveGpuLayers(16, {
@@ -142,7 +142,7 @@ describe("stableCode", () => {
                         ignoreMemorySafetyChecks: true
                     });
                     expect(res.gpuLayers).to.eql(16);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("138");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("136");
                 }
 
 
@@ -174,7 +174,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 6
                     });
                     expect(res.gpuLayers).to.eql(32);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("11348");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("11347");
                 }
                 try {
                     await resolveGpuLayers(32, {
@@ -192,7 +192,7 @@ describe("stableCode", () => {
                         ignoreMemorySafetyChecks: true
                     });
                     expect(res.gpuLayers).to.eql(32);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("48");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("47");
                 }
 
                 {
@@ -223,7 +223,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 6
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("11348");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("11347");
                 }
                 try {
                     await resolveGpuLayers(33, {
@@ -241,7 +241,7 @@ describe("stableCode", () => {
                         ignoreMemorySafetyChecks: true
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("48");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("47");
                 }
 
                 {
@@ -303,7 +303,7 @@ describe("stableCode", () => {
                         ignoreMemorySafetyChecks: true
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("48");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("47");
                 }
                 {
                     const res = await resolveGpuLayers("max", {
@@ -311,7 +311,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("5887");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("5886");
                 }
                 {
                     const res = await resolveGpuLayers("max", {
@@ -319,7 +319,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4.4
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("6979");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("6978");
                 }
                 {
                     const res = await resolveGpuLayers("max", {
@@ -327,7 +327,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4.8
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("8072");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("8070");
                 }
             });
 
@@ -346,7 +346,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 0.4
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("1");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("10864");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("10841");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -362,7 +362,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 1.4
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("5");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("8368");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("8361");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -370,7 +370,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 2.4
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("1518");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("1517");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -378,7 +378,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 3.1
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("3429");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("3428");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -386,7 +386,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 3.3
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("3976");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("3974");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -394,7 +394,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 3.5
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("4522");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("4520");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -402,7 +402,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 3.8
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("5341");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("5340");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -410,7 +410,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("5887");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("5886");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -418,7 +418,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4.3
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("6706");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("6705");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -426,7 +426,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4.5
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("7252");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("7251");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -434,7 +434,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4.8
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("8072");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("8070");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -442,7 +442,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 5.2
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("9164");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("9163");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -450,7 +450,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 5.8
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("10802");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("10801");
                 }
                 {
                     const res = await resolveGpuLayers("auto", {
@@ -458,7 +458,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 6
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("11348");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("11347");
                 }
             });
 
@@ -504,7 +504,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4
                     });
                     expect(res.gpuLayers).to.eql(16);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("13255");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("13252");
                 }
                 try {
                     await resolveGpuLayers({min: 16}, {
@@ -522,7 +522,7 @@ describe("stableCode", () => {
                     });
                     expect(res.gpuLayers).to.be.gte(16);
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("5887");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("5886");
                 }
                 {
                     const res = await resolveGpuLayers({min: 16, max: 24}, {
@@ -532,7 +532,7 @@ describe("stableCode", () => {
                     expect(res.gpuLayers).to.be.gte(16);
                     expect(res.gpuLayers).to.be.lte(24);
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("23");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("8249");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("8248");
                 }
                 {
                     const res = await resolveGpuLayers({min: 16, max: 24}, {
@@ -542,7 +542,7 @@ describe("stableCode", () => {
                     expect(res.gpuLayers).to.be.gte(16);
                     expect(res.gpuLayers).to.be.lte(24);
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("16");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("8064");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("8061");
                 }
             });
 
@@ -565,7 +565,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("33");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("5887");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("5886");
                     expect(res.contextSize).to.be.gte(contextSize);
                 }
                 {
@@ -575,7 +575,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 1
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("3");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("5933");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("5921");
                     expect(res.contextSize).to.be.gte(contextSize);
                 }
                 {
@@ -585,7 +585,7 @@ describe("stableCode", () => {
                         freeVram: s1GB * 4
                     });
                     expect(res.gpuLayers).to.toMatchInlineSnapshot("21");
-                    expect(res.contextSize).to.toMatchInlineSnapshot("9208");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("9206");
                     expect(res.contextSize).to.be.gte(contextSize);
                 }
                 {