diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 419dc73f..32605a11 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -57,7 +57,7 @@ jobs: matrix: config: - name: "Windows for x64" - os: windows-2019 + os: windows-2022 artifact: "win-x64" - name: "Windows for Arm" os: windows-2022 diff --git a/.vitepress/utils/ensureLocalImage.ts b/.vitepress/utils/ensureLocalImage.ts index 47894cd1..cbba76a9 100644 --- a/.vitepress/utils/ensureLocalImage.ts +++ b/.vitepress/utils/ensureLocalImage.ts @@ -55,7 +55,7 @@ export async function ensureLocalImage(url: string, name: string, { if (resolvedImages.has(cacheKey)) return resolvedImages.get(cacheKey)!; - return await withLock(cacheKey[0], cacheKey[1], async () => { + return await withLock([resolvedImages, ...cacheKey], async () => { if (resolvedImages.has(cacheKey)) return resolvedImages.get(cacheKey)!; @@ -185,7 +185,9 @@ function getFileExtension(format: keyof FormatEnum | undefined) { async function fetchWithRetry(url: string, retires: number = 5, waitTime: number = 1000 * 2) { for (let i = retires; i >= 0; i--) { try { - return await fetch(url); + return await fetch(url, { + redirect: "follow" + }); } catch (err) { if (i === 0) { console.error(`Failed to fetch image: ${url}`, err); diff --git a/docs/guide/embedding.md b/docs/guide/embedding.md index 6ce591a5..cf697e09 100644 --- a/docs/guide/embedding.md +++ b/docs/guide/embedding.md @@ -172,7 +172,7 @@ const documents = [ "Cleaning the house is a good way to keep it tidy" ]; -const query = "Tell me a goegraphical fact"; +const query = "Tell me a nature geographical fact"; const rankedDocuments = await context.rankAndSort(query, documents); const topDocument = rankedDocuments[0]!; @@ -185,7 +185,7 @@ console.log("Ranked documents:", rankedDocuments); ``` > This example will produce this output: > ``` -> query: Tell me a goegraphical fact +> query: Tell me a nature geographical fact > Top document: Mount Everest is the tallest mountain in the world > Second document: The capital of France is Paris > ``` diff --git a/llama/addon/addon.cpp b/llama/addon/addon.cpp index eef81c25..a01a987e 100644 --- a/llama/addon/addon.cpp +++ b/llama/addon/addon.cpp @@ -196,6 +196,36 @@ Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) { return info.Env().Undefined(); } +Napi::Value addonSetNuma(const Napi::CallbackInfo& info) { + const bool numaDisabled = info.Length() == 0 + ? true + : info[0].IsBoolean() + ? !info[0].As().Value() + : false; + + if (numaDisabled) + return info.Env().Undefined(); + + const auto numaType = info[0].IsString() + ? info[0].As().Utf8Value() + : ""; + + if (numaType == "distribute") { + llama_numa_init(GGML_NUMA_STRATEGY_DISTRIBUTE); + } else if (numaType == "isolate") { + llama_numa_init(GGML_NUMA_STRATEGY_ISOLATE); + } else if (numaType == "numactl") { + llama_numa_init(GGML_NUMA_STRATEGY_NUMACTL); + } else if (numaType == "mirror") { + llama_numa_init(GGML_NUMA_STRATEGY_MIRROR); + } else { + Napi::Error::New(info.Env(), std::string("Invalid NUMA strategy \"") + numaType + "\"").ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } + + return info.Env().Undefined(); +} + Napi::Value addonInit(const Napi::CallbackInfo& info) { if (backendInitialized) { Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env()); @@ -255,6 +285,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) { Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo), Napi::PropertyDescriptor::Function("getMemoryInfo", getMemoryInfo), Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends), + Napi::PropertyDescriptor::Function("setNuma", addonSetNuma), Napi::PropertyDescriptor::Function("init", addonInit), Napi::PropertyDescriptor::Function("dispose", addonDispose), }); diff --git a/package-lock.json b/package-lock.json index 0e327ffe..719e8ef6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,7 @@ "hasInstallScript": true, "license": "MIT", "dependencies": { - "@huggingface/jinja": "^0.5.0", + "@huggingface/jinja": "^0.5.1", "async-retry": "^1.3.3", "bytes": "^3.1.2", "chalk": "^5.4.1", @@ -24,7 +24,7 @@ "ignore": "^7.0.4", "ipull": "^3.9.2", "is-unicode-supported": "^2.1.0", - "lifecycle-utils": "^2.0.1", + "lifecycle-utils": "^3.0.1", "log-symbols": "^7.0.0", "nanoid": "^5.1.5", "node-addon-api": "^8.3.1", @@ -70,7 +70,7 @@ "@types/yargs": "^17.0.33", "@vitest/coverage-v8": "^3.1.3", "@vitest/ui": "^3.1.3", - "electron": "^36.2.0", + "electron": "^37.2.4", "eslint": "^9.26.0", "eslint-import-resolver-typescript": "^4.3.4", "eslint-plugin-import": "^2.31.0", @@ -91,7 +91,7 @@ "typescript-eslint": "^8.32.0", "vite-node": "^3.1.3", "vitepress": "^1.6.3", - "vitepress-plugin-llms": "https://pkg.pr.new/vitepress-plugin-llms@51", + "vitepress-plugin-llms": "^1.7.2", "vitest": "^3.1.3", "zx": "^8.5.4" }, @@ -1613,9 +1613,9 @@ } }, "node_modules/@huggingface/jinja": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.0.tgz", - "integrity": "sha512-Ptc03/jGRiYRoi0bUYKZ14MkDslsBRT24oxmsvUlfYrvQMldrxCevhPnT+hfX8awKTT8/f/0ZBBWldoeAcMHdQ==", + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.1.tgz", + "integrity": "sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==", "license": "MIT", "engines": { "node": ">=18" @@ -2111,6 +2111,29 @@ "url": "https://opencollective.com/libvips" } }, + "node_modules/@isaacs/balanced-match": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz", + "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@isaacs/brace-expansion": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz", + "integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@isaacs/balanced-match": "^4.0.1" + }, + "engines": { + "node": "20 || >=22" + } + }, "node_modules/@isaacs/cliui": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", @@ -7501,9 +7524,9 @@ "license": "MIT" }, "node_modules/electron": { - "version": "36.2.0", - "resolved": "https://registry.npmjs.org/electron/-/electron-36.2.0.tgz", - "integrity": "sha512-5yldoRjBKxPQfI0QMX+qq750o3Nl8N1SZnJqOPMq0gZ6rIJ+7y4ZLp808GrFwjfTm05TYgq3GSD8FGuKQZqwEw==", + "version": "37.2.4", + "resolved": "https://registry.npmjs.org/electron/-/electron-37.2.4.tgz", + "integrity": "sha512-F1WDDvY60TpFwGyW+evNB5q0Em8PamcDTVIKB2NaiaKEbNC2Fabn8Wyxy5g+Anirr1K40eKGjfSJhWEUbI1TOw==", "dev": true, "hasInstallScript": true, "license": "MIT", @@ -11548,9 +11571,9 @@ } }, "node_modules/lifecycle-utils": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/lifecycle-utils/-/lifecycle-utils-2.0.1.tgz", - "integrity": "sha512-jVso5WXIHfDL7Lf9sCRbLbPwgpoha5qUPgi+RMNVIMuOcb0nJ9Qr0r1OXbqLaxzBUQBhN8jYy92RLSk2OGJ6Cg==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/lifecycle-utils/-/lifecycle-utils-3.0.1.tgz", + "integrity": "sha512-Qt/Jl5dsNIsyCAZsHB6x3mbwHFn0HJbdmvF49sVX/bHgX2cW7+G+U+I67Zw+TPM1Sr21Gb2nfJMd2g6iUcI1EQ==", "license": "MIT" }, "node_modules/lines-and-columns": { @@ -19465,9 +19488,9 @@ } }, "node_modules/tokenx": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/tokenx/-/tokenx-1.0.1.tgz", - "integrity": "sha512-MhOngUHRuVE0CHP4cNEZ/XpdXETFL65nJpEvoTW+VYPuXsT/MTeNj+UNnekNsnxecmj2DEvUYPebqz+CsPTUSg==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/tokenx/-/tokenx-1.1.0.tgz", + "integrity": "sha512-KCjtiC2niPwTSuz4ktM82Ki5bjqBwYpssiHDsGr5BpejN/B3ksacRvrsdoxljdMIh2nCX78alnDkeemBmYUmTA==", "dev": true, "license": "MIT" }, @@ -20349,21 +20372,23 @@ } }, "node_modules/vitepress-plugin-llms": { - "version": "1.3.4", - "resolved": "https://pkg.pr.new/vitepress-plugin-llms@51", - "integrity": "sha512-FTyNYyx1jVbKae/raJLgDTgMaHSmY51B1nbokeC4KAhXMe413eGSexNIdvnCHXf9U1t92VlLajJ5S9E7adDoOQ==", + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/vitepress-plugin-llms/-/vitepress-plugin-llms-1.7.2.tgz", + "integrity": "sha512-4UxB3PXfRAfzbcKRXizRQajstjmYn1hoFOSCGIQBYyu3qYs9/TEAUe6oLGbiwaDD+wPQ/T1ow59pt2LAMR4/1A==", "dev": true, "license": "MIT", "dependencies": { "byte-size": "^9.0.1", "gray-matter": "^4.0.3", + "markdown-it": "^14.1.0", "markdown-title": "^1.0.2", "millify": "^6.1.0", - "minimatch": "^10.0.1", + "minimatch": "^10.0.3", + "path-to-regexp": "^8.2.0", "picocolors": "^1.1.1", "remark": "^15.0.1", "remark-frontmatter": "^5.0.0", - "tokenx": "^1.0.0", + "tokenx": "^1.1.0", "unist-util-remove": "^4.0.0", "unist-util-visit": "^5.0.0" }, @@ -20372,13 +20397,13 @@ } }, "node_modules/vitepress-plugin-llms/node_modules/minimatch": { - "version": "10.0.1", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.0.1.tgz", - "integrity": "sha512-ethXTt3SGGR+95gudmqJ1eNhRO7eGEGIgYA9vnPatK4/etz2MEVDno5GMCibdMTuBMyElzIlgxMna3K94XDIDQ==", + "version": "10.0.3", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.0.3.tgz", + "integrity": "sha512-IPZ167aShDZZUMdRk66cyQAW3qr0WzbHkPdMYa8bzZhlHhO3jALbKdxcaak7W9FfT2rZNpQuUu4Od7ILEpXSaw==", "dev": true, "license": "ISC", "dependencies": { - "brace-expansion": "^2.0.1" + "@isaacs/brace-expansion": "^5.0.0" }, "engines": { "node": "20 || >=22" diff --git a/package.json b/package.json index b63a6e46..bbaeabc5 100644 --- a/package.json +++ b/package.json @@ -157,7 +157,7 @@ "@types/yargs": "^17.0.33", "@vitest/coverage-v8": "^3.1.3", "@vitest/ui": "^3.1.3", - "electron": "^36.2.0", + "electron": "^37.2.4", "eslint": "^9.26.0", "eslint-import-resolver-typescript": "^4.3.4", "eslint-plugin-import": "^2.31.0", @@ -178,12 +178,12 @@ "typescript-eslint": "^8.32.0", "vite-node": "^3.1.3", "vitepress": "^1.6.3", - "vitepress-plugin-llms": "https://pkg.pr.new/vitepress-plugin-llms@51", + "vitepress-plugin-llms": "^1.7.2", "vitest": "^3.1.3", "zx": "^8.5.4" }, "dependencies": { - "@huggingface/jinja": "^0.5.0", + "@huggingface/jinja": "^0.5.1", "async-retry": "^1.3.3", "bytes": "^3.1.2", "chalk": "^5.4.1", @@ -197,7 +197,7 @@ "ignore": "^7.0.4", "ipull": "^3.9.2", "is-unicode-supported": "^2.1.0", - "lifecycle-utils": "^2.0.1", + "lifecycle-utils": "^3.0.1", "log-symbols": "^7.0.0", "nanoid": "^5.1.5", "node-addon-api": "^8.3.1", diff --git a/src/bindings/AddonTypes.ts b/src/bindings/AddonTypes.ts index a2f06ae9..a1cbefc3 100644 --- a/src/bindings/AddonTypes.ts +++ b/src/bindings/AddonTypes.ts @@ -1,4 +1,5 @@ import {Token} from "../types.js"; +import {LlamaNuma} from "./types.js"; export type BindingModule = { @@ -85,6 +86,7 @@ export type BindingModule = { total: number }, init(): Promise, + setNuma(numa?: LlamaNuma): void, loadBackends(forceLoadLibrariesSearchPath?: string): void, dispose(): Promise }; @@ -159,7 +161,7 @@ export type AddonContext = { }; export type BatchLogitIndex = number & { - __batchLogitIndex: never + readonly __batchLogitIndex: never }; export type AddonGrammar = { @@ -167,7 +169,7 @@ export type AddonGrammar = { }; export type AddonGrammarEvaluationState = "AddonGrammarEvaluationState" & { - __brand: never + readonly __brand: never }; export type AddonSampler = { diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts index 005e4a7a..243ad4ff 100644 --- a/src/bindings/Llama.ts +++ b/src/bindings/Llama.ts @@ -11,7 +11,7 @@ import {LlamaGrammar, LlamaGrammarOptions} from "../evaluator/LlamaGrammar.js"; import {ThreadsSplitter} from "../utils/ThreadsSplitter.js"; import {getLlamaClasses, LlamaClasses} from "../utils/getLlamaClasses.js"; import {BindingModule} from "./AddonTypes.js"; -import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel, LlamaLogLevelGreaterThanOrEqual} from "./types.js"; +import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel, LlamaLogLevelGreaterThanOrEqual, LlamaNuma} from "./types.js"; import {MemoryOrchestrator, MemoryReservation} from "./utils/MemoryOrchestrator.js"; export const LlamaLogLevelToAddonLogLevel: ReadonlyMap = new Map([ @@ -67,8 +67,8 @@ export class Llama { public readonly onDispose = new EventRelay(); private constructor({ - bindings, bindingPath, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, buildGpu, maxThreads, vramOrchestrator, - vramPadding, ramOrchestrator, ramPadding, swapOrchestrator + bindings, bindingPath, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, numa, buildGpu, maxThreads, + vramOrchestrator, vramPadding, ramOrchestrator, ramPadding, swapOrchestrator }: { bindings: BindingModule, bindingPath: string, @@ -81,6 +81,7 @@ export class Llama { release: string }, debug: boolean, + numa?: LlamaNuma, buildGpu: BuildGpu, maxThreads?: number, vramOrchestrator: MemoryOrchestrator, @@ -110,6 +111,9 @@ export class Llama { bindings.ensureGpuDeviceIsSupported(); + if (numa != null && numa !== false) + bindings.setNuma(numa); + this._gpu = bindings.getGpuType() ?? false; this._supportsGpuOffloading = bindings.getSupportsGpuOffloading(); this._supportsMmap = bindings.getSupportsMmap(); @@ -328,7 +332,7 @@ export class Llama { public async loadModel(options: LlamaModelOptions) { this._ensureNotDisposed(); - return await withLock(this._memoryLock, LlamaLocks.loadToMemory, options.loadSignal, async () => { + return await withLock([this._memoryLock, LlamaLocks.loadToMemory], options.loadSignal, async () => { this._ensureNotDisposed(); const preventDisposalHandle = this._backendDisposeGuard.createPreventDisposalHandle(); @@ -468,7 +472,7 @@ export class Llama { /** @internal */ public static async _create({ bindings, bindingPath, buildType, buildMetadata, logLevel, logger, vramPadding, ramPadding, maxThreads, skipLlamaInit = false, - debug + debug, numa }: { bindings: BindingModule, bindingPath: string, @@ -480,7 +484,8 @@ export class Llama { vramPadding: number | ((totalVram: number) => number), ramPadding: number | ((totalRam: number) => number), skipLlamaInit?: boolean, - debug: boolean + debug: boolean, + numa?: LlamaNuma }) { const vramOrchestrator = new MemoryOrchestrator(() => { const {total, used, unifiedSize} = bindings.getGpuVramInfo(); @@ -537,6 +542,7 @@ export class Llama { logLevel, logger, debug, + numa, buildGpu: buildMetadata.buildOptions.gpu, vramOrchestrator, maxThreads, @@ -643,6 +649,12 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string): LlamaLog return LlamaLogLevel.log; else if (level === LlamaLogLevel.warn && message.startsWith("make_cpu_buft_list: disabling extra buffer types")) return LlamaLogLevel.info; + else if (level === LlamaLogLevel.warn && message.startsWith("llama_context: non-unified KV cache requires ggml_set_rows() - forcing unified KV cache")) + return LlamaLogLevel.info; + else if (level === LlamaLogLevel.warn && message.startsWith("llama_kv_cache_unified: LLAMA_SET_ROWS=0, using old ggml_cpy() method for backwards compatibility")) + return LlamaLogLevel.info; + else if (level === LlamaLogLevel.warn && message.startsWith("init: embeddings required but some input tokens were not marked as outputs -> overriding")) + return LlamaLogLevel.info; return level; } diff --git a/src/bindings/getLlama.ts b/src/bindings/getLlama.ts index 8ba71a22..3d6b85a9 100644 --- a/src/bindings/getLlama.ts +++ b/src/bindings/getLlama.ts @@ -16,7 +16,7 @@ import { } from "./utils/compileLLamaCpp.js"; import {getLastBuildInfo} from "./utils/lastBuildInfo.js"; import {getClonedLlamaCppRepoReleaseInfo, isLlamaCppRepoCloned} from "./utils/cloneLlamaCppRepo.js"; -import {BuildGpu, BuildMetadataFile, BuildOptions, LlamaGpuType, LlamaLogLevel} from "./types.js"; +import {BuildGpu, BuildMetadataFile, BuildOptions, LlamaGpuType, LlamaLogLevel, LlamaNuma} from "./types.js"; import {BinaryPlatform, getPlatform} from "./utils/getPlatform.js"; import {getBuildFolderNameForBuildOptions} from "./utils/getBuildFolderNameForBuildOptions.js"; import {resolveCustomCmakeOptions} from "./utils/resolveCustomCmakeOptions.js"; @@ -171,7 +171,27 @@ export type LlamaOptions = { * * Defaults to `false`. */ - dryRun?: boolean + dryRun?: boolean, + + /** + * NUMA (Non-Uniform Memory Access) allocation policy. + * + * On multi-socket or multi-cluster machines, each CPU "socket" (or node) has its own local memory. + * Accessing memory on your own socket is fast, but another socket's memory is slower. + * Setting a NUMA (Non-Uniform Memory Access) allocation policy can + * dramatically improve performance by keeping data local and "close" to the socket. + * + * These are the available NUMA options: + * - **`false`**: Don't set any NUMA policy - let the OS decide. + * - **`"distribute"`**: Distribute the memory across all available NUMA nodes. + * - **`"isolate"`**: Pin both threads and their memory to a single NUMA node to avoid cross-node traffic. + * - **`"numactl"`**: Delegate NUMA management to the external `numactl` command (or `libnuma` library) to set the NUMA policy. + * - **`"mirror"`**: Allocate memory on all NUMA nodes, and copy the data to all of them. + * This ensures minimal traffic between nodes, but uses more memory. + * + * Defaults to `false` (no NUMA policy). + */ + numa?: LlamaNuma }; export type LastBuildOptions = { @@ -261,7 +281,27 @@ export type LastBuildOptions = { * * Defaults to `false`. */ - dryRun?: boolean + dryRun?: boolean, + + /** + * NUMA (Non-Uniform Memory Access) allocation policy. + * + * On multi-socket or multi-cluster machines, each CPU "socket" (or node) has its own local memory. + * Accessing memory on your own socket is fast, but another socket's memory is slower. + * Setting a NUMA (Non-Uniform Memory Access) allocation policy can + * dramatically improve performance by keeping data local and "close" to the socket. + * + * These are the available NUMA options: + * - **`false`**: Don't set any NUMA policy - let the OS decide. + * - **`"distribute"`**: Distribute the memory across all available NUMA nodes. + * - **`"isolate"`**: Pin both threads and their memory to a single NUMA node to avoid cross-node traffic. + * - **`"numactl"`**: Delegate NUMA management to the external `numactl` command (or `libnuma` library) to set the NUMA policy. + * - **`"mirror"`**: Allocate memory on all NUMA nodes, and copy the data to all of them. + * This ensures minimal traffic between nodes, but uses more memory. + * + * Defaults to `false` (no NUMA policy). + */ + numa?: LlamaNuma }; export const getLlamaFunctionName = "getLlama"; @@ -319,6 +359,7 @@ export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOp vramPadding: lastBuildOptions?.vramPadding ?? defaultLlamaVramPadding, ramPadding: lastBuildOptions?.ramPadding ?? defaultLlamaRamPadding, debug: lastBuildOptions?.debug ?? defaultLlamaCppDebugMode, + numa: lastBuildOptions?.numa, dryRun }; @@ -346,6 +387,7 @@ export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOp vramPadding: lastBuildOptions?.vramPadding ?? defaultLlamaVramPadding, ramPadding: lastBuildOptions?.ramPadding ?? defaultLlamaRamPadding, debug: lastBuildOptions?.debug ?? defaultLlamaCppDebugMode, + numa: lastBuildOptions?.numa, skipLlamaInit: dryRun }); @@ -380,6 +422,7 @@ export async function getLlamaForOptions({ vramPadding = defaultLlamaVramPadding, ramPadding = defaultLlamaRamPadding, debug = defaultLlamaCppDebugMode, + numa = false, dryRun = false }: LlamaOptions, { updateLastBuildInfoOnCompile = false, @@ -453,6 +496,7 @@ export async function getLlamaForOptions({ vramPadding, ramPadding, debug, + numa, dryRun }); } catch (err) { @@ -470,6 +514,7 @@ export async function getLlamaForOptions({ vramPadding, ramPadding, debug, + numa, dryRun }); } @@ -516,6 +561,7 @@ export async function getLlamaForOptions({ : null ), debug, + numa, pipeBinaryTestErrorLogs }); @@ -577,7 +623,8 @@ export async function getLlamaForOptions({ vramPadding, ramPadding, skipLlamaInit, - debug + debug, + numa }); } catch (err) { console.error( @@ -622,6 +669,7 @@ async function loadExistingLlamaBinary({ ramPadding, fallbackMessage, debug, + numa, pipeBinaryTestErrorLogs }: { buildOptions: BuildOptions, @@ -638,6 +686,7 @@ async function loadExistingLlamaBinary({ ramPadding: Required["ramPadding"], fallbackMessage: string | null, debug: boolean, + numa?: LlamaNuma, pipeBinaryTestErrorLogs: boolean }) { const buildFolderName = await getBuildFolderNameForBuildOptions(buildOptions); @@ -674,7 +723,8 @@ async function loadExistingLlamaBinary({ vramPadding, ramPadding, skipLlamaInit, - debug + debug, + numa }); } else if (progressLogs) { console.warn( @@ -733,7 +783,8 @@ async function loadExistingLlamaBinary({ vramPadding, ramPadding, skipLlamaInit, - debug + debug, + numa }); } else if (progressLogs) { const binaryDescription = describeBinary({ @@ -788,7 +839,8 @@ async function buildAndLoadLlamaBinary({ vramPadding, ramPadding, skipLlamaInit, - debug + debug, + numa }: { buildOptions: BuildOptions, skipDownload: boolean, @@ -799,7 +851,8 @@ async function buildAndLoadLlamaBinary({ vramPadding: Required["vramPadding"], ramPadding: Required["ramPadding"], skipLlamaInit: boolean, - debug: boolean + debug: boolean, + numa?: LlamaNuma }) { const buildFolderName = await getBuildFolderNameForBuildOptions(buildOptions); @@ -833,7 +886,8 @@ async function buildAndLoadLlamaBinary({ vramPadding, ramPadding, skipLlamaInit, - debug + debug, + numa }); } diff --git a/src/bindings/types.ts b/src/bindings/types.ts index 4adfd86f..7748772c 100644 --- a/src/bindings/types.ts +++ b/src/bindings/types.ts @@ -22,6 +22,7 @@ export type BuildOptions = { release: string } }; +export type LlamaNuma = false | "distribute" | "isolate" | "numactl" | "mirror"; export type BuildOptionsJSON = Omit & { customCmakeOptions: Record diff --git a/src/bindings/utils/getLlamaWithoutBackend.ts b/src/bindings/utils/getLlamaWithoutBackend.ts index 6a64d59f..992cdf6a 100644 --- a/src/bindings/utils/getLlamaWithoutBackend.ts +++ b/src/bindings/utils/getLlamaWithoutBackend.ts @@ -12,7 +12,7 @@ export async function getLlamaWithoutBackend() { if (sharedLlamaWithoutBackend != null) return sharedLlamaWithoutBackend; - return await withLock(getLlamaWithoutBackend, "loadAddon", async () => { + return await withLock([getLlamaWithoutBackend, "loadAddon"], async () => { if (sharedLlamaWithoutBackend != null) return sharedLlamaWithoutBackend; diff --git a/src/cli/commands/inspect/commands/InspectGpuCommand.ts b/src/cli/commands/inspect/commands/InspectGpuCommand.ts index 59502963..51caa14c 100644 --- a/src/cli/commands/inspect/commands/InspectGpuCommand.ts +++ b/src/cli/commands/inspect/commands/InspectGpuCommand.ts @@ -8,12 +8,14 @@ import {BuildGpu, LlamaLogLevel} from "../../../../bindings/types.js"; import {getPrettyBuildGpuName} from "../../../../bindings/consts.js"; import {getModuleVersion} from "../../../../utils/getModuleVersion.js"; import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js"; -import {documentationPageUrls} from "../../../../config.js"; +import {builtinLlamaCppGitHubRepo, documentationPageUrls} from "../../../../config.js"; import {Llama} from "../../../../bindings/Llama.js"; import {getPlatformInfo} from "../../../../bindings/utils/getPlatformInfo.js"; import {getLinuxDistroInfo} from "../../../../bindings/utils/getLinuxDistroInfo.js"; import {isRunningUnderRosetta} from "../../../utils/isRunningUnderRosetta.js"; import {toBytes} from "../../../utils/toBytes.js"; +import {getBinariesGithubRelease} from "../../../../bindings/utils/binariesGithubRelease.js"; +import {getClonedLlamaCppRepoReleaseInfo} from "../../../../bindings/utils/cloneLlamaCppRepo.js"; type InspectGpuCommand = { // no options for now @@ -74,8 +76,33 @@ export const InspectGpuCommand: CommandModule = { try { const moduleVersion = await getModuleVersion(); - if (moduleVersion != null) + if (moduleVersion != null) { + console.info(); console.info(`${chalk.yellow("node-llama-cpp:")} ${moduleVersion}`); + } + } catch (err) { + // do nothing + } + + try { + const prebuiltBinariesRelease = await getBinariesGithubRelease(); + + console.info(`${chalk.yellow("Prebuilt binaries:")} ${prebuiltBinariesRelease}`); + } catch (err) { + // do nothing + } + + try { + const clonedLlamaCppRelease = await getClonedLlamaCppRepoReleaseInfo(); + + if (clonedLlamaCppRelease != null) + console.info( + `${chalk.yellow("Cloned source:")} ${clonedLlamaCppRelease.tag}` + ( + clonedLlamaCppRelease.llamaCppGithubRepo !== builtinLlamaCppGitHubRepo + ? ` (${clonedLlamaCppRelease.llamaCppGithubRepo})` + : "" + ) + ); } catch (err) { // do nothing } diff --git a/src/evaluator/LlamaChat/LlamaChat.ts b/src/evaluator/LlamaChat/LlamaChat.ts index 628e220c..ddbfbcec 100644 --- a/src/evaluator/LlamaChat/LlamaChat.ts +++ b/src/evaluator/LlamaChat/LlamaChat.ts @@ -603,7 +603,7 @@ export class LlamaChat { if (generateResponseState.grammar != null && generateResponseState.functionsEnabled) throw new Error("Using both grammar and functions is not supported yet"); - return await withLock(this._chatLock, "evaluate", signal, async (): Promise> => { + return await withLock([this._chatLock, "evaluate"], signal, async (): Promise> => { try { generateResponseState.ensureLastHistoryItemIsModel(); generateResponseState.ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded(); @@ -801,7 +801,7 @@ export class LlamaChat { } ); - return await withLock(this._chatLock, "evaluate", signal, async (): Promise => { + return await withLock([this._chatLock, "evaluate"], signal, async (): Promise => { try { generateResponseState.ensureLastHistoryItemIsUser(); diff --git a/src/evaluator/LlamaChatSession/LlamaChatSession.ts b/src/evaluator/LlamaChatSession/LlamaChatSession.ts index f0a0ba77..183b6729 100644 --- a/src/evaluator/LlamaChatSession/LlamaChatSession.ts +++ b/src/evaluator/LlamaChatSession/LlamaChatSession.ts @@ -525,7 +525,7 @@ export class LlamaChatSession { throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar."); this._stopAllPreloadAndPromptCompletions(); - return await withLock(this._chatLock, "evaluation", signal, async () => { + return await withLock([this._chatLock, "evaluation"], signal, async () => { this._ensureNotDisposed(); this._stopAllPreloadAndPromptCompletions(); @@ -856,7 +856,7 @@ export class LlamaChatSession { this._preloadAndCompleteAbortControllers.add(abortController); try { - return await withLock(this._chatLock, "evaluation", abortController.signal, async () => { + return await withLock([this._chatLock, "evaluation"], abortController.signal, async () => { this._ensureNotDisposed(); if (this._chat == null) diff --git a/src/evaluator/LlamaCompletion.ts b/src/evaluator/LlamaCompletion.ts index 37446682..3b500472 100644 --- a/src/evaluator/LlamaCompletion.ts +++ b/src/evaluator/LlamaCompletion.ts @@ -302,7 +302,7 @@ export class LlamaCompletion { throw new DisposedError(); }; - return await withLock(this, "generateCompletion", signal, async () => { + return await withLock([this as LlamaCompletion, "generateCompletion"], signal, async () => { ensureNotAborted(); if (this._sequence == null || this.disposed) @@ -503,7 +503,7 @@ export class LlamaCompletion { throw new DisposedError(); }; - return await withLock(this, "generateCompletion", signal, async () => { + return await withLock([this as LlamaCompletion, "generateCompletion"], signal, async () => { ensureNotAborted(); if (this._sequence == null || this.disposed) diff --git a/src/evaluator/LlamaContext/LlamaContext.ts b/src/evaluator/LlamaContext/LlamaContext.ts index 8a5cff98..974f7865 100644 --- a/src/evaluator/LlamaContext/LlamaContext.ts +++ b/src/evaluator/LlamaContext/LlamaContext.ts @@ -315,7 +315,7 @@ export class LlamaContext { this._batchDispatchPending = true; - void withLock(this, "context", async () => { + void withLock([this as LlamaContext, "context"], async () => { this._currentDispatchBatchHandle = {}; this._dispatchDecodeScheduled = false; this._batchDispatchPending = false; @@ -589,7 +589,7 @@ export class LlamaContext { let decodeLock: Lock | undefined; // this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel if (this._llama.gpu === "vulkan") - decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode"); + decodeLock = await acquireLock([decodeSyncWorkaround.vulkanLock, "decode"]); try { await decodeTokenBatchItems(currentBatchItems, currentBatchSize); @@ -653,7 +653,7 @@ export class LlamaContext { if (this._disposed) return; - void withLock(this, "context", async () => { + void withLock([this as LlamaContext, "context"], async () => { if (this._disposed) return; @@ -1215,7 +1215,7 @@ export class LlamaContextSequence { let awaitPromise: Promise | undefined; - await withLock(this._context, "context", async () => { + await withLock([this._context, "context"], async () => { this._ensureNotDisposed(); if (ranges.length === 0) @@ -1555,7 +1555,7 @@ export class LlamaContextSequence { return item; }); - const evaluatorLock = await acquireLock(this._lock, "evaluate"); + const evaluatorLock = await acquireLock([this._lock, "evaluate"]); try { return await this._decodeTokens( resolvedTokens, @@ -1588,7 +1588,7 @@ export class LlamaContextSequence { tokenBias: sampleOptions.tokenBias }); - return await withLock(sampler, "sample", async () => { + return await withLock([sampler, "sample"], async () => { if (sampler.disposed) return undefined; @@ -1623,7 +1623,7 @@ export class LlamaContextSequence { ); } finally { evaluatorLock.dispose(); - void withLock(sampler, "sample", sampler.asyncDispose); + void withLock([sampler, "sample"], sampler.asyncDispose); } } @@ -1638,8 +1638,8 @@ export class LlamaContextSequence { const resolvedPath = path.resolve(process.cwd(), filePath); - const evaluatorLock = await acquireLock(this._lock, "evaluate"); - const contextLock = await acquireLock(this._context, "context"); + const evaluatorLock = await acquireLock([this._lock, "evaluate"]); + const contextLock = await acquireLock([this._context, "context"]); try { this._ensureNotDisposed(); @@ -1681,8 +1681,8 @@ export class LlamaContextSequence { const resolvedPath = path.resolve(process.cwd(), filePath); - const evaluatorLock = await acquireLock(this._lock, "evaluate"); - const contextLock = await acquireLock(this._context, "context"); + const evaluatorLock = await acquireLock([this._lock, "evaluate"]); + const contextLock = await acquireLock([this._context, "context"]); try { this._ensureNotDisposed(); @@ -1757,7 +1757,7 @@ export class LlamaContextSequence { this._ensureNotDisposed(); const evaluatorLock = _skipLock ? undefined - : await acquireLock(this._lock, "evaluate"); + : await acquireLock([this._lock, "evaluate"]); let nextToken: Token | -1 | null | undefined; const yieldRes: Partial> = {}; @@ -1789,7 +1789,7 @@ export class LlamaContextSequence { tokenBias }); - return withLock(sampler, "sample", async () => { + return withLock([sampler, "sample"], async () => { if (sampler.disposed) return null; @@ -1847,7 +1847,7 @@ export class LlamaContextSequence { evalTokens = [nextToken]; } } finally { - void withLock(sampler, "sample", sampler.asyncDispose); + void withLock([sampler, "sample"], sampler.asyncDispose); } } @@ -1895,7 +1895,7 @@ export class LlamaContextSequence { try { while (true) { this._ensureNotDisposed(); - const evaluatorLock = await acquireLock(this._lock, "evaluate"); + const evaluatorLock = await acquireLock([this._lock, "evaluate"]); let nextToken: Token | undefined; const yieldRes: Partial> = {}; @@ -2019,7 +2019,7 @@ export class LlamaContextSequence { tokenBias }); - return withLock(sampler, "sample", async () => { + return withLock([sampler, "sample"], async () => { if (sampler.disposed) return null; @@ -2112,7 +2112,7 @@ export class LlamaContextSequence { logitsArray[logitsStartIndex] = true; } } finally { - void withLock(sampler, "sample", sampler.asyncDispose); + void withLock([sampler, "sample"], sampler.asyncDispose); if (this._tokenPredictorOwner === tokenPredictorOwner) tokenPredictor.stop(); diff --git a/src/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.ts b/src/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.ts index d20e3522..18ff71f7 100644 --- a/src/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.ts +++ b/src/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.ts @@ -119,7 +119,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor { targetSequence.context._ctx.ensureDraftContextIsCompatibleForSpeculative(this._draftSequence.context._ctx); try { - await withLock(this, "evaluate", currentAbortSignal, async () => { + await withLock([this as DraftSequenceTokenPredictor, "evaluate"], currentAbortSignal, async () => { this._stateTokens = stateTokens.slice(); this._pendingEvalTokens = []; this._predictedTokens = []; @@ -157,7 +157,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor { const grammarEvaluationStateOption = this._evaluateOptions.grammarEvaluationState instanceof Function ? this._evaluateOptions.grammarEvaluationState()?.clone() : this._evaluateOptions.grammarEvaluationState?.clone(); - void withLock(this, "pushTokens", async () => { + void withLock([this as DraftSequenceTokenPredictor, "pushTokens"], async () => { this._grammarEvaluationStateOption = grammarEvaluationStateOption; const tokensToPush = tokens.slice(); @@ -226,7 +226,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor { if (untilPredictionsExhausted) this._waitForPredictionExhaustion = true; - void withLock(this, "evaluate", async () => { + void withLock([this as DraftSequenceTokenPredictor, "evaluate"], async () => { this._iterator?.return(); this._iterator = undefined; }); @@ -238,7 +238,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor { this._resetAbortController.abort(); this._currentEvaluationAbortController.abort(); - void withLock(this, "evaluate", async () => { + void withLock([this as DraftSequenceTokenPredictor, "evaluate"], async () => { this._iterator?.return(); this._iterator = undefined; }); @@ -255,7 +255,7 @@ export class DraftSequenceTokenPredictor extends TokenPredictor { return; this._active = true; - void withLock(this, "evaluate", async () => { + void withLock([this as DraftSequenceTokenPredictor, "evaluate"], async () => { try { const abortSignal = this._currentEvaluationAbortController.signal; diff --git a/src/evaluator/LlamaEmbeddingContext.ts b/src/evaluator/LlamaEmbeddingContext.ts index 0e8695bc..cd91d747 100644 --- a/src/evaluator/LlamaEmbeddingContext.ts +++ b/src/evaluator/LlamaEmbeddingContext.ts @@ -96,7 +96,7 @@ export class LlamaEmbeddingContext { if (endToken != null && resolvedInput.at(-1) !== endToken) resolvedInput.push(endToken); - return await withLock(this, "evaluate", async () => { + return await withLock([this as LlamaEmbeddingContext, "evaluate"], async () => { await this._sequence.eraseContextTokenRanges([{ start: 0, end: this._sequence.nextTokenIndex diff --git a/src/evaluator/LlamaModel/LlamaModel.ts b/src/evaluator/LlamaModel/LlamaModel.ts index f53ab21a..9d7fa343 100644 --- a/src/evaluator/LlamaModel/LlamaModel.ts +++ b/src/evaluator/LlamaModel/LlamaModel.ts @@ -537,7 +537,7 @@ export class LlamaModel { if (this._vocabOnly) throw new Error("Model is loaded in vocabOnly mode, so no context can be created"); - return await withLock(this._llama._memoryLock, LlamaLocks.loadToMemory, options.createSignal, async () => { + return await withLock([this._llama._memoryLock, LlamaLocks.loadToMemory], options.createSignal, async () => { const preventDisposalHandle = this._backendModelDisposeGuard.createPreventDisposalHandle(); try { return await LlamaContext._create(options, {_model: this}); @@ -673,7 +673,7 @@ export class LlamaModel { if (this._loraAdapters.has(resolvedPath)) return this._loraAdapters.get(resolvedPath)!; - return await withLock(this._loraAdapters, "modify", async () => { + return await withLock([this._loraAdapters, "modify"], async () => { if (this._loraAdapters.has(resolvedPath)) return this._loraAdapters.get(resolvedPath)!; @@ -687,7 +687,7 @@ export class LlamaModel { /** @internal */ public async _removeLoraUsage(loraAdapters: Set) { - return await withLock(this._loraAdapters, "modify", async () => { + return await withLock([this._loraAdapters, "modify"], async () => { await Promise.all( [...loraAdapters].map(async (lora) => { lora.usages--; diff --git a/src/evaluator/LlamaRankingContext.ts b/src/evaluator/LlamaRankingContext.ts index 8a32251e..71ee32e9 100644 --- a/src/evaluator/LlamaRankingContext.ts +++ b/src/evaluator/LlamaRankingContext.ts @@ -185,7 +185,7 @@ export class LlamaRankingContext { if (input.length === 0) return Promise.resolve(0); - return withLock(this, "evaluate", async () => { + return withLock([this as LlamaRankingContext, "evaluate"], async () => { await this._sequence.eraseContextTokenRanges([{ start: 0, end: this._sequence.nextTokenIndex diff --git a/src/gguf/fileReaders/GgufFsFileReader.ts b/src/gguf/fileReaders/GgufFsFileReader.ts index d6200b6b..9080083f 100644 --- a/src/gguf/fileReaders/GgufFsFileReader.ts +++ b/src/gguf/fileReaders/GgufFsFileReader.ts @@ -52,7 +52,7 @@ export class GgufFsFileReader extends GgufFileReader { } private async _readToExpandBufferUpToOffset(endOffset: number, extraAllocationSize: number = defaultExtraAllocationSize) { - return await withLock(this, "modifyBuffer", this._signal, async () => { + return await withLock([this as GgufFsFileReader, "modifyBuffer"], this._signal, async () => { if (endOffset < this._buffer.length) return; diff --git a/src/gguf/fileReaders/GgufNetworkFetchFileReader.ts b/src/gguf/fileReaders/GgufNetworkFetchFileReader.ts index 2fd2eb6f..11113420 100644 --- a/src/gguf/fileReaders/GgufNetworkFetchFileReader.ts +++ b/src/gguf/fileReaders/GgufNetworkFetchFileReader.ts @@ -66,7 +66,7 @@ export class GgufNetworkFetchFileReader extends GgufFileReader { } private async _fetchToExpandBufferUpToOffset(endOffset: number, extraAllocationSize: number = defaultExtraAllocationSize) { - await withLock(this, "modifyBuffer", this._signal, async () => { + await withLock([this as GgufNetworkFetchFileReader, "modifyBuffer"], this._signal, async () => { if (endOffset < this._buffer.length) return; diff --git a/src/index.ts b/src/index.ts index 6605c331..2332caaa 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,7 +4,7 @@ import {getLlama, type LlamaOptions, type LastBuildOptions} from "./bindings/get import {getLlamaGpuTypes} from "./bindings/utils/getLlamaGpuTypes.js"; import {NoBinaryFoundError} from "./bindings/utils/NoBinaryFoundError.js"; import { - type LlamaGpuType, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType + type LlamaGpuType, type LlamaNuma, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType } from "./bindings/types.js"; import {resolveModelFile, type ResolveModelFileOptions} from "./utils/resolveModelFile.js"; import {LlamaModel, LlamaModelInfillTokens, type LlamaModelOptions, LlamaModelTokens} from "./evaluator/LlamaModel/LlamaModel.js"; @@ -124,6 +124,7 @@ export { type LlamaOptions, type LastBuildOptions, type LlamaGpuType, + type LlamaNuma, type LlamaClasses, LlamaLogLevel, NoBinaryFoundError, diff --git a/src/utils/LruCache.ts b/src/utils/LruCache.ts index 9e224cd3..6d6b40a2 100644 --- a/src/utils/LruCache.ts +++ b/src/utils/LruCache.ts @@ -12,7 +12,7 @@ export class LruCache { this._onDelete = onDelete; } - public get(key: Key) { + public get(key: Key): Value | undefined { if (!this._cache.has(key)) return undefined; diff --git a/src/utils/ReplHistory.ts b/src/utils/ReplHistory.ts index 11be485a..194c1d88 100644 --- a/src/utils/ReplHistory.ts +++ b/src/utils/ReplHistory.ts @@ -25,7 +25,7 @@ export class ReplHistory { return; } - await withLock(this, "file", async () => { + await withLock([this as ReplHistory, "file"], async () => { try { const json = parseReplJsonfile(await fs.readJSON(this._filePath!)); this._fileContent = this._addItemToHistory(line, json); diff --git a/src/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.ts b/src/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.ts index a78cd82a..817279ef 100644 --- a/src/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.ts +++ b/src/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.ts @@ -199,5 +199,8 @@ export function getGbnfJsonTerminalForGbnfJsonSchema( if (isGbnfJsonBasicSchemaIncludesType(schema, "null")) terminals.push(new GbnfNull()); + if (terminals.length === 0) + terminals.push(new GbnfNull()); + return new GbnfOr(terminals); } diff --git a/src/utils/isLockfileActive.ts b/src/utils/isLockfileActive.ts index 8aae5645..f9b36b7a 100644 --- a/src/utils/isLockfileActive.ts +++ b/src/utils/isLockfileActive.ts @@ -7,12 +7,12 @@ export async function isLockfileActive({ }: { resourcePath: string, staleDuration?: number }) { - if (isLockActive(lockfileLockScope, resourcePath)) + if (isLockActive([lockfileLockScope, resourcePath])) return true; const lockfileActive = await lockfile.check(resourcePath, {stale: staleDuration, realpath: false}); if (lockfileActive) return true; - return isLockActive(lockfileLockScope, resourcePath); + return isLockActive([lockfileLockScope, resourcePath]); } diff --git a/src/utils/utilTypes.ts b/src/utils/utilTypes.ts index b7b96191..53211fd5 100644 --- a/src/utils/utilTypes.ts +++ b/src/utils/utilTypes.ts @@ -2,6 +2,16 @@ export type Writable = { -readonly [P in keyof T]: T[P]; }; +/** + * Omit all the keys from `Value` that are not present in `Options` and are `true`. + * + * For example: + * ```ts + * type Value = {a: number, b: string, c: boolean}; + * type Options = {a: true, b: false, c: true}; + * type Result = PickOptions; // {a: number, c: boolean} + * ``` + */ export type PickOptions< Value extends Readonly>, Options extends {readonly [key: string]: boolean | undefined} diff --git a/src/utils/waitForLockfileRelease.ts b/src/utils/waitForLockfileRelease.ts index 4cf385ae..47581bee 100644 --- a/src/utils/waitForLockfileRelease.ts +++ b/src/utils/waitForLockfileRelease.ts @@ -8,13 +8,13 @@ export async function waitForLockfileRelease({ resourcePath: string, checkInterval?: number, staleDuration?: number }) { while (true) { - if (isLockActive(lockfileLockScope, resourcePath)) { - await waitForLockRelease(lockfileLockScope, resourcePath); + if (isLockActive([lockfileLockScope, resourcePath])) { + await waitForLockRelease([lockfileLockScope, resourcePath]); continue; } const lockfileActive = await lockfile.check(resourcePath, {stale: staleDuration, realpath: false}); - const lockIsActive = isLockActive(lockfileLockScope, resourcePath); + const lockIsActive = isLockActive([lockfileLockScope, resourcePath]); if (lockIsActive) continue; diff --git a/src/utils/withLockfile.ts b/src/utils/withLockfile.ts index f5a962da..09edf3cb 100644 --- a/src/utils/withLockfile.ts +++ b/src/utils/withLockfile.ts @@ -12,7 +12,7 @@ export async function withLockfile( }, callback: () => T | Promise ): Promise { - return await withLock(lockfileLockScope, resourcePath, async () => { + return await withLock([lockfileLockScope, resourcePath], async () => { let releaseLock: () => Promise; let res: T; diff --git a/templates/electron-typescript-react/electron/state/llmState.ts b/templates/electron-typescript-react/electron/state/llmState.ts index 7aee4647..060e89bb 100644 --- a/templates/electron-typescript-react/electron/state/llmState.ts +++ b/templates/electron-typescript-react/electron/state/llmState.ts @@ -95,7 +95,7 @@ let inProgressResponse: SimplifiedModelChatItem["message"] = []; export const llmFunctions = { async loadLlama() { - await withLock(llmFunctions, "llama", async () => { + await withLock([llmFunctions, "llama"], async () => { if (llama != null) { try { await llama.dispose(); @@ -136,7 +136,7 @@ export const llmFunctions = { }); }, async loadModel(modelPath: string) { - await withLock(llmFunctions, "model", async () => { + await withLock([llmFunctions, "model"], async () => { if (llama == null) throw new Error("Llama not loaded"); @@ -198,7 +198,7 @@ export const llmFunctions = { }); }, async createContext() { - await withLock(llmFunctions, "context", async () => { + await withLock([llmFunctions, "context"], async () => { if (model == null) throw new Error("Model not loaded"); @@ -242,7 +242,7 @@ export const llmFunctions = { }); }, async createContextSequence() { - await withLock(llmFunctions, "contextSequence", async () => { + await withLock([llmFunctions, "contextSequence"], async () => { if (context == null) throw new Error("Context not loaded"); @@ -278,7 +278,7 @@ export const llmFunctions = { }, chatSession: { async createChatSession() { - await withLock(llmFunctions, "chatSession", async () => { + await withLock([llmFunctions, "chatSession"], async () => { if (contextSequence == null) throw new Error("Context sequence not loaded"); @@ -336,7 +336,7 @@ export const llmFunctions = { }); }, async prompt(message: string) { - await withLock(llmFunctions, "chatSession", async () => { + await withLock([llmFunctions, "chatSession"], async () => { if (chatSession == null) throw new Error("Chat session not loaded"); diff --git a/templates/electron-typescript-react/package.json b/templates/electron-typescript-react/package.json index df1afcf1..bac000cd 100644 --- a/templates/electron-typescript-react/package.json +++ b/templates/electron-typescript-react/package.json @@ -26,7 +26,7 @@ "birpc": "^2.3.0", "classnames": "^2.5.1", "highlight.js": "^11.11.1", - "lifecycle-utils": "^2.0.0", + "lifecycle-utils": "^3.0.1", "markdown-it": "^14.1.0", "node-llama-cpp": "file:../..", "pretty-ms": "^9.2.0", diff --git a/templates/electron-typescript-react/src/App/components/Header/components/UpdateBadge.tsx b/templates/electron-typescript-react/src/App/components/Header/components/UpdateBadge.tsx index 5ec1d9fc..9d978cf5 100644 --- a/templates/electron-typescript-react/src/App/components/Header/components/UpdateBadge.tsx +++ b/templates/electron-typescript-react/src/App/components/Header/components/UpdateBadge.tsx @@ -25,7 +25,7 @@ export function UpdateBadge({appVersion, canShowCurrentVersion}: UpdateBadgeProp const updateLatestVersionInfo = useCallback(async () => { clearTimeout(nextUpdateTimeoutRef.current); - await withLock(instanceLock.current, "updateVersion", async () => { + await withLock([instanceLock.current, "updateVersion"], async () => { clearTimeout(nextUpdateTimeoutRef.current); const latestVersion = await getLatestAvailableVersion(appVersionIsBeta ?? false); diff --git a/test/modelDependent/bgeReranker/rank.test.ts b/test/modelDependent/bgeReranker/rank.test.ts index bfc90c33..b82db45e 100644 --- a/test/modelDependent/bgeReranker/rank.test.ts +++ b/test/modelDependent/bgeReranker/rank.test.ts @@ -28,7 +28,7 @@ describe("bgeReranker", () => { "Cleaning the house is a good way to keep it tidy" ]; - const query = "Tell me a geographical fact"; + const query = "Tell me a nature geographical fact"; const ranks = await Promise.all( documents.map((doc) => rankingContext.rank(query, doc)) @@ -40,19 +40,19 @@ describe("bgeReranker", () => { const highestRankDocument = documents[highestRankIndex]; expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world"); - expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.01798620996209156"); + expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.0024726231566347743"); expect(simplifyRanks(ranks)).toMatchInlineSnapshot(` [ - 0.000013674009084599736, - 0.000013674009084599736, - 0.000013674009084599736, - 0.003684239899435989, - 0.000013674009084599736, - 0.01798620996209156, - 0.000013674009084599736, - 0.000013674009084599736, 0.00002039908727992137, - 0.000013674009084599736, + 0.00006772414961977023, + 0.00003716893710288947, + 0.00003716893710288947, + 0.00003716893710288947, + 0.0024726231566347743, + 0.00003716893710288947, + 0.00003716893710288947, + 0.00002039908727992137, + 0.00003716893710288947, ] `); }); @@ -81,7 +81,7 @@ describe("bgeReranker", () => { "Cleaning the house is a good way to keep it tidy" ]; - const query = "Tell me a geographical fact"; + const query = "Tell me a nature geographical fact"; const ranks = await rankingContext.rankAll(query, documents); @@ -91,19 +91,19 @@ describe("bgeReranker", () => { const highestRankDocument = documents[highestRankIndex]; expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world"); - expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.01798620996209156"); + expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.0024726231566347743"); expect(simplifyRanks(ranks)).toMatchInlineSnapshot(` [ - 0.000013674009084599736, - 0.000013674009084599736, - 0.000013674009084599736, - 0.003684239899435989, - 0.000013674009084599736, - 0.01798620996209156, - 0.000013674009084599736, - 0.000013674009084599736, 0.00002039908727992137, - 0.000013674009084599736, + 0.00006772414961977023, + 0.00003716893710288947, + 0.00003716893710288947, + 0.00003716893710288947, + 0.0024726231566347743, + 0.00003716893710288947, + 0.00003716893710288947, + 0.00002039908727992137, + 0.00003716893710288947, ] `); }); @@ -130,7 +130,7 @@ describe("bgeReranker", () => { "Cleaning the house is a good way to keep it tidy" ]; - const query = "Tell me a geographical fact"; + const query = "Tell me a nature geographical fact"; const rankedDocuments = await rankingContext.rankAndSort(query, documents); @@ -141,42 +141,42 @@ describe("bgeReranker", () => { expect(simplifySortedRanks([topDocument])[0]).toMatchInlineSnapshot(` { "document": "Mount Everest is the tallest mountain in the world", - "score": 0.01798620996209156, + "score": 0.0024726231566347743, } `); expect(simplifySortedRanks(rankedDocuments)).toMatchInlineSnapshot(` [ { "document": "Mount Everest is the tallest mountain in the world", - "score": 0.01798620996209156, + "score": 0.0024726231566347743, }, { - "document": "The capital of France is Paris", - "score": 0.003684239899435989, + "document": "I love eating pizza with extra cheese", + "score": 0.00006772414961977023, }, { - "document": "Not all the things that shine are made of gold", - "score": 0.00002039908727992137, + "document": "The capital of France is Paris", + "score": 0.00003716893710288947, }, { - "document": "I love eating pizza with extra cheese", - "score": 0.000013674009084599736, + "document": "Dogs love to play fetch with their owners", + "score": 0.00003716893710288947, }, { - "document": "Dogs love to play fetch with their owners", - "score": 0.000013674009084599736, + "document": "Cleaning the house is a good way to keep it tidy", + "score": 0.00003716893710288947, }, { - "document": "The sky is clear and blue today", - "score": 0.000013674009084599736, + "document": "A warm cup of tea is perfect for a cold winter day", + "score": 0.00003716893710288947, }, { - "document": "Cleaning the house is a good way to keep it tidy", - "score": 0.000013674009084599736, + "document": "Not all the things that shine are made of gold", + "score": 0.00002039908727992137, }, { - "document": "A warm cup of tea is perfect for a cold winter day", - "score": 0.000013674009084599736, + "document": "The sky is clear and blue today", + "score": 0.00002039908727992137, }, ] `); @@ -196,7 +196,7 @@ function simplifySortedRanks { contextSequence: contextSequence2 }); - const res2 = await chatSession2.prompt("How much is 6+6+6"); + const res2 = await chatSession2.prompt("How much is 6+6+6?"); const tokenMeterState2 = contextSequence2.tokenMeter.getState(); expect(tokenMeterState2).to.toMatchInlineSnapshot(` { - "usedInputTokens": 82, - "usedOutputTokens": 14, + "usedInputTokens": 83, + "usedOutputTokens": 11, } `); expect(tokenMeterState2.usedInputTokens).to.be.greaterThanOrEqual(tokenMeterState.usedInputTokens); - expect(res2).to.eql("The sum of 6+6+6 is 18."); + expect(res2).to.eql("6 + 6 + 6 = 18"); }); test("reusing a context sequences utilizes existing state", {timeout: 1000 * 60 * 60 * 2}, async () => { diff --git a/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts b/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts index d8247dd9..3ccf0673 100644 --- a/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts +++ b/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts @@ -114,7 +114,7 @@ describe("functionary", () => { freeRam: s1GB * 6 }); expect(res.gpuLayers).to.eql(0); - expect(res.contextSize).to.toMatchInlineSnapshot("7718"); + expect(res.contextSize).to.toMatchInlineSnapshot("7717"); } { const res = await resolveGpuLayers(0, { @@ -255,7 +255,7 @@ describe("functionary", () => { freeRam: s1GB * 4.5 }); expect(res.gpuLayers).to.eql(16); - expect(res.contextSize).to.toMatchInlineSnapshot("4016"); + expect(res.contextSize).to.toMatchInlineSnapshot("4011"); } try { await resolveGpuLayers(16, { @@ -318,7 +318,7 @@ describe("functionary", () => { ignoreMemorySafetyChecks: true }); expect(res.gpuLayers).to.eql(0); - expect(res.contextSize).to.toMatchInlineSnapshot("7718"); + expect(res.contextSize).to.toMatchInlineSnapshot("7717"); } }); @@ -343,7 +343,7 @@ describe("functionary", () => { unifiedMemorySize: s1GB * 7.3 }); expect(res.gpuLayers).to.eql(16); - expect(res.contextSize).to.toMatchInlineSnapshot("1760"); + expect(res.contextSize).to.toMatchInlineSnapshot("1757"); } { const res = await resolveGpuLayers(16, { @@ -354,7 +354,7 @@ describe("functionary", () => { unifiedMemorySize: s1GB * 5.3 }); expect(res.gpuLayers).to.eql(16); - expect(res.contextSize).to.toMatchInlineSnapshot("5505"); + expect(res.contextSize).to.toMatchInlineSnapshot("5502"); } try { await resolveGpuLayers(16, { @@ -783,7 +783,7 @@ describe("functionary", () => { llamaGpu: false }); expect(res.gpuLayers).to.eql(0); - expect(res.contextSize).to.toMatchInlineSnapshot("7718"); + expect(res.contextSize).to.toMatchInlineSnapshot("7717"); } { const res = await resolveGpuLayers(33, { @@ -795,7 +795,7 @@ describe("functionary", () => { ignoreMemorySafetyChecks: true }); expect(res.gpuLayers).to.eql(0); - expect(res.contextSize).to.toMatchInlineSnapshot("7718"); + expect(res.contextSize).to.toMatchInlineSnapshot("7717"); } }); @@ -809,7 +809,7 @@ describe("functionary", () => { unifiedMemorySize: s1GB * 6 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("6251"); + expect(res.contextSize).to.toMatchInlineSnapshot("6248"); } { const res = await resolveGpuLayers(33, { @@ -820,7 +820,7 @@ describe("functionary", () => { unifiedMemorySize: s1GB * 6 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("2974"); + expect(res.contextSize).to.toMatchInlineSnapshot("2972"); } { const res = await resolveGpuLayers(33, { @@ -831,7 +831,7 @@ describe("functionary", () => { unifiedMemorySize: s1GB * 6 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("1336"); + expect(res.contextSize).to.toMatchInlineSnapshot("1333"); } try { await resolveGpuLayers(33, { @@ -908,7 +908,7 @@ describe("functionary", () => { freeRam: s1GB * 1 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("472"); + expect(res.contextSize).to.toMatchInlineSnapshot("471"); } { const res = await resolveGpuLayers("max", { @@ -918,7 +918,7 @@ describe("functionary", () => { freeRam: s1GB * 1 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("898"); + expect(res.contextSize).to.toMatchInlineSnapshot("895"); } }); @@ -962,7 +962,7 @@ describe("functionary", () => { freeRam: s1GB * 8 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("7"); - expect(res.contextSize).to.toMatchInlineSnapshot("7483"); + expect(res.contextSize).to.toMatchInlineSnapshot("7471"); } { const res = await resolveGpuLayers("auto", { @@ -1062,7 +1062,7 @@ describe("functionary", () => { freeRam: s1GB * 8 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("4721"); + expect(res.contextSize).to.toMatchInlineSnapshot("4718"); } { const res = await resolveGpuLayers("auto", { @@ -1072,7 +1072,7 @@ describe("functionary", () => { freeRam: s1GB * 8 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("7998"); + expect(res.contextSize).to.toMatchInlineSnapshot("7995"); } { const res = await resolveGpuLayers("auto", { @@ -1125,7 +1125,7 @@ describe("functionary", () => { freeRam: s1GB * 5 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("7"); - expect(res.contextSize).to.toMatchInlineSnapshot("7483"); + expect(res.contextSize).to.toMatchInlineSnapshot("7471"); } { const res = await resolveGpuLayers("auto", { @@ -1225,7 +1225,7 @@ describe("functionary", () => { freeRam: s1GB * 5 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("4721"); + expect(res.contextSize).to.toMatchInlineSnapshot("4718"); } { const res = await resolveGpuLayers("auto", { @@ -1235,7 +1235,7 @@ describe("functionary", () => { freeRam: s1GB * 5 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("7998"); + expect(res.contextSize).to.toMatchInlineSnapshot("7995"); } { const res = await resolveGpuLayers("auto", { @@ -1349,7 +1349,7 @@ describe("functionary", () => { expect(res.gpuLayers).to.be.gte(16); expect(res.gpuLayers).to.be.lte(24); expect(res.gpuLayers).to.toMatchInlineSnapshot("16"); - expect(res.contextSize).to.toMatchInlineSnapshot("4016"); + expect(res.contextSize).to.toMatchInlineSnapshot("4011"); } }); @@ -1451,7 +1451,7 @@ describe("functionary", () => { expect(res.gpuLayers).to.be.gte(16); expect(res.gpuLayers).to.be.lte(24); expect(res.gpuLayers).to.toMatchInlineSnapshot("16"); - expect(res.contextSize).to.toMatchInlineSnapshot("4016"); + expect(res.contextSize).to.toMatchInlineSnapshot("4011"); } }); }); @@ -1480,7 +1480,7 @@ describe("functionary", () => { freeRam: s1GB * 8 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("21"); - expect(res.contextSize).to.toMatchInlineSnapshot("6535"); + expect(res.contextSize).to.toMatchInlineSnapshot("6531"); expect(res.contextSize).to.be.gte(contextSize); } { @@ -1492,7 +1492,7 @@ describe("functionary", () => { freeRam: s1GB * 8 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("7"); - expect(res.contextSize).to.toMatchInlineSnapshot("7483"); + expect(res.contextSize).to.toMatchInlineSnapshot("7471"); expect(res.contextSize).to.be.gte(contextSize); } { @@ -1569,7 +1569,7 @@ describe("functionary", () => { freeRam: s1GB * 7 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("21"); - expect(res.contextSize).to.toMatchInlineSnapshot("6535"); + expect(res.contextSize).to.toMatchInlineSnapshot("6531"); expect(res.contextSize).to.be.gte(contextSize); } { @@ -1581,7 +1581,7 @@ describe("functionary", () => { freeRam: s1GB * 7 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("7"); - expect(res.contextSize).to.toMatchInlineSnapshot("7483"); + expect(res.contextSize).to.toMatchInlineSnapshot("7471"); expect(res.contextSize).to.be.gte(contextSize); } { diff --git a/test/modelDependent/functionary/gguf/ggufInsights.test.ts b/test/modelDependent/functionary/gguf/ggufInsights.test.ts index ee193e2c..b7f22d33 100644 --- a/test/modelDependent/functionary/gguf/ggufInsights.test.ts +++ b/test/modelDependent/functionary/gguf/ggufInsights.test.ts @@ -92,7 +92,7 @@ describe("gguf", async () => { const modelVramUsageDiff = currentModelVramUsage - initialModelVramUsage; - const s300MB = 300 * Math.pow(1024, 2); + const s330MB = 330 * Math.pow(1024, 2); const s5MB = 5 * Math.pow(1024, 2); const estimatedModelResourceUsage = ggufInsights.estimateModelResourceRequirements({ @@ -100,7 +100,7 @@ describe("gguf", async () => { }); expect(toBytes(estimatedModelResourceUsage.gpuVram)).toMatchInlineSnapshot('"4.06GB"'); expect(toBytes(estimatedModelResourceUsage.cpuRam)).toMatchInlineSnapshot('"281.81MB"'); - expect(Math.abs(modelVramUsageDiff - estimatedModelResourceUsage.gpuVram)).to.be.lte(s300MB); + expect(Math.abs(modelVramUsageDiff - estimatedModelResourceUsage.gpuVram)).to.be.lte(s330MB); const modelEstimationDiffWithActual = estimatedModelResourceUsage.gpuVram + estimatedModelResourceUsage.cpuRam - model.size; expect(Math.abs(modelEstimationDiffWithActual)).to.be.lte(s5MB); // tolerate such a small difference @@ -125,7 +125,7 @@ describe("gguf", async () => { modelGpuLayers: ggufInsights.totalLayers }).gpuVram; expect(toBytes(estimatedContextVramUsage)).toMatchInlineSnapshot("\"1.03GB\""); - expect(Math.abs(contextVramUsageDiff - estimatedContextVramUsage)).to.be.lte(s300MB); + expect(Math.abs(contextVramUsageDiff - estimatedContextVramUsage)).to.be.lte(s330MB); await model.dispose(); }); @@ -190,7 +190,7 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "643.07MB", + "cpuRam": "643.08MB", "gpuVram": "0B", } `); @@ -201,7 +201,7 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "451.07MB", + "cpuRam": "451.08MB", "gpuVram": "0B", } `); @@ -214,7 +214,7 @@ describe("gguf", async () => { }))).toMatchInlineSnapshot(` { "cpuRam": "1.71GB", - "gpuVram": "355.25MB", + "gpuVram": "355.75MB", } `); expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({ @@ -224,8 +224,8 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "1002.8MB", - "gpuVram": "315.25MB", + "cpuRam": "1002.82MB", + "gpuVram": "315.75MB", } `); expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({ @@ -235,8 +235,8 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "630.8MB", - "gpuVram": "295.25MB", + "cpuRam": "630.82MB", + "gpuVram": "295.75MB", } `); expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({ @@ -246,8 +246,8 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "444.8MB", - "gpuVram": "285.25MB", + "cpuRam": "444.82MB", + "gpuVram": "285.75MB", } `); @@ -258,7 +258,7 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "1022.78MB", + "cpuRam": "1022.79MB", "gpuVram": "1.05GB", } `); @@ -269,8 +269,8 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "638.78MB", - "gpuVram": "679.25MB", + "cpuRam": "638.79MB", + "gpuVram": "679.75MB", } `); expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({ @@ -280,8 +280,8 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "446.78MB", - "gpuVram": "479.25MB", + "cpuRam": "446.79MB", + "gpuVram": "479.75MB", } `); expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({ @@ -291,8 +291,8 @@ describe("gguf", async () => { batchSize: 512 }))).toMatchInlineSnapshot(` { - "cpuRam": "350.78MB", - "gpuVram": "379.25MB", + "cpuRam": "350.79MB", + "gpuVram": "379.75MB", } `); @@ -326,7 +326,7 @@ describe("gguf", async () => { }))).toMatchInlineSnapshot(` { "cpuRam": "250.5MB", - "gpuVram": "667.52MB", + "gpuVram": "668.02MB", } `); expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({ @@ -337,7 +337,7 @@ describe("gguf", async () => { }))).toMatchInlineSnapshot(` { "cpuRam": "250.5MB", - "gpuVram": "475.52MB", + "gpuVram": "476.02MB", } `); @@ -371,7 +371,7 @@ describe("gguf", async () => { }))).toMatchInlineSnapshot(` { "cpuRam": "250.5MB", - "gpuVram": "667.52MB", + "gpuVram": "668.02MB", } `); expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({ @@ -382,7 +382,7 @@ describe("gguf", async () => { }))).toMatchInlineSnapshot(` { "cpuRam": "250.5MB", - "gpuVram": "475.52MB", + "gpuVram": "476.02MB", } `); }); diff --git a/test/modelDependent/llama3.1/chunkDocument.test.ts b/test/modelDependent/llama3.1/chunkDocument.test.ts index 4484b3c2..6af8a6f0 100644 --- a/test/modelDependent/llama3.1/chunkDocument.test.ts +++ b/test/modelDependent/llama3.1/chunkDocument.test.ts @@ -28,7 +28,7 @@ const exampleParagraph = [ describe("llama 3.1", () => { describe("chunk document", () => { - test("DraftModelTokenPredictor", {timeout: 1000 * 60 * 60 * 2}, async () => { + test("basic usage", {timeout: 1000 * 60 * 60 * 2}, async () => { const modelPath = await getModelFile("Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf"); const llama = await getTestLlama(); diff --git a/test/modelDependent/llama3.1/controlledEvaluate.test.ts b/test/modelDependent/llama3.1/controlledEvaluate.test.ts index c9dcde17..3145c84f 100644 --- a/test/modelDependent/llama3.1/controlledEvaluate.test.ts +++ b/test/modelDependent/llama3.1/controlledEvaluate.test.ts @@ -96,73 +96,73 @@ describe("llama 3.1", () => { { "next": { "probabilities": Map { - 35308 => 0.5214946, - 27096 => 0.2432059, - 11 => 0.0221824, - 198 => 0.0119446, - 374 => 0.0083614, - 863 => 0.0083608, - 1131 => 0.0068347, - 25 => 0.0062433, - 7940 => 0.0054039, - 1 => 0.0051688, + 35308 => 0.5214539, + 27096 => 0.2432189, + 11 => 0.0221867, + 198 => 0.0119489, + 374 => 0.0083635, + 863 => 0.0083618, + 1131 => 0.0068354, + 25 => 0.0062467, + 7940 => 0.0054025, + 320 => 0.0051706, }, }, }, { "next": { "probabilities": Map { - 927 => 0.9811904, - 198 => 0.0033849, - 6288 => 0.0032705, + 927 => 0.9811952, + 198 => 0.0033833, + 6288 => 0.00327, 279 => 0.0006553, - 1633 => 0.0003184, - 1035 => 0.0003114, - 13 => 0.0002917, - 264 => 0.0002895, + 1633 => 0.0003185, + 1035 => 0.0003111, + 13 => 0.0002916, + 264 => 0.0002894, 297 => 0.0002833, - 720 => 0.000249, + 720 => 0.0002489, }, "token": 927, }, }, { "next": { - "confidence": 0.9306729, + "confidence": 0.930688, "token": 279, }, }, { "next": { - "confidence": 0.9597685, + "confidence": 0.9597747, "probabilities": Map { - 16053 => 0.9597685, - 1208 => 0.0047506, - 198 => 0.0031827, - 5679 => 0.0029162, - 65536 => 0.0019724, - 6435 => 0.0009124, - 2697 => 0.0006706, + 16053 => 0.9597747, + 1208 => 0.0047502, + 198 => 0.0031807, + 5679 => 0.0029171, + 65536 => 0.0019718, + 6435 => 0.0009126, + 2697 => 0.0006707, 720 => 0.0005979, - 21811 => 0.0005517, - 45363 => 0.0005495, + 21811 => 0.0005516, + 45363 => 0.0005494, }, }, }, { "next": { - "confidence": 0.987146, + "confidence": 0.9871562, "probabilities": Map { - 5679 => 0.987146, - 21811 => 0.0014387, - 198 => 0.0009368, - 8415 => 0.0007225, - 12875 => 0.0003803, - 4194 => 0.000347, - 720 => 0.0002815, - 14588 => 0.0002761, - 9522 => 0.0002417, - 627 => 0.0002042, + 5679 => 0.9871562, + 21811 => 0.0014367, + 198 => 0.0009356, + 8415 => 0.0007227, + 12875 => 0.0003802, + 4194 => 0.0003468, + 720 => 0.0002813, + 14588 => 0.000276, + 9522 => 0.0002415, + 627 => 0.0002041, }, "token": 5679, }, diff --git a/test/modelDependent/llama3.1/evaluateWithMetadata.test.ts b/test/modelDependent/llama3.1/evaluateWithMetadata.test.ts index 2bc659f5..d8142a4b 100644 --- a/test/modelDependent/llama3.1/evaluateWithMetadata.test.ts +++ b/test/modelDependent/llama3.1/evaluateWithMetadata.test.ts @@ -98,45 +98,45 @@ describe("llama 3.1", () => { [ { "probabilities": Map { - 578 => 0.4307292, - 1115 => 0.1304377, - 1102 => 0.0516853, - 763 => 0.042889, - 1283 => 0.029397, - 2100 => 0.0293787, - 15636 => 0.0262684, - 2030 => 0.021849, - 320 => 0.016903, - 1628 => 0.0118695, + 578 => 0.4307095, + 1115 => 0.1304636, + 1102 => 0.0516819, + 763 => 0.0428933, + 1283 => 0.0293915, + 2100 => 0.0293782, + 15636 => 0.0262626, + 2030 => 0.0218519, + 320 => 0.0169018, + 1628 => 0.0118644, }, "token": 578, }, { "probabilities": Map { - 16053 => 0.4222992, - 4062 => 0.3036339, - 39935 => 0.0603973, - 2944 => 0.0373043, - 5679 => 0.0238118, - 11914 => 0.0162981, - 2144 => 0.0146835, - 1121 => 0.0069849, - 17571 => 0.0057944, - 3446 => 0.0049346, + 16053 => 0.4223687, + 4062 => 0.303549, + 39935 => 0.0603321, + 2944 => 0.0373496, + 5679 => 0.0237923, + 11914 => 0.0163001, + 2144 => 0.0146822, + 1121 => 0.0069893, + 17571 => 0.0057973, + 3446 => 0.0049349, }, "token": 16053, }, { "probabilities": Map { - 5679 => 0.9981185, - 12875 => 0.0001592, + 5679 => 0.9981177, + 12875 => 0.0001593, 18964 => 0.0001154, - 39935 => 0.000115, - 13 => 0.0001049, + 39935 => 0.0001149, + 13 => 0.000105, 627 => 0.0000928, - 656 => 0.0000625, + 656 => 0.0000626, 893 => 0.0000563, - 198 => 0.0000522, + 198 => 0.0000523, 374 => 0.0000519, }, "token": 5679, @@ -144,56 +144,56 @@ describe("llama 3.1", () => { { "probabilities": Map { 374 => 0.8126541, - 1587 => 0.0481526, - 596 => 0.0247368, - 1120 => 0.0223041, - 3250 => 0.0215465, - 706 => 0.0161833, - 15849 => 0.0086943, - 1053 => 0.0059125, - 55064 => 0.0037811, + 1587 => 0.0481505, + 596 => 0.0247274, + 1120 => 0.022311, + 3250 => 0.0215521, + 706 => 0.0161821, + 15849 => 0.0086956, + 1053 => 0.0059156, + 55064 => 0.0037815, 11 => 0.0036657, }, "token": 374, }, { "probabilities": Map { - 2288 => 0.2757553, - 1120 => 0.1666547, - 539 => 0.1577473, - 779 => 0.133445, - 264 => 0.0558533, - 1101 => 0.0292142, - 16053 => 0.0176781, - 5042 => 0.015864, - 1193 => 0.014582, - 2744 => 0.0140904, + 2288 => 0.2758818, + 1120 => 0.1666409, + 539 => 0.1577165, + 779 => 0.1333762, + 264 => 0.0558459, + 1101 => 0.029207, + 16053 => 0.0176698, + 5042 => 0.0158617, + 1193 => 0.0145808, + 2744 => 0.0140919, }, "token": 2288, }, { "probabilities": Map { - 16053 => 0.9065909, - 13326 => 0.0636439, - 19781 => 0.007158, - 17551 => 0.0020244, - 10968 => 0.0012683, - 11920 => 0.0011008, - 6435 => 0.0010087, - 34386 => 0.0007758, - 1208 => 0.0006099, - 25366 => 0.0005672, + 16053 => 0.9066046, + 13326 => 0.0636245, + 19781 => 0.007155, + 17551 => 0.0020255, + 10968 => 0.0012684, + 11920 => 0.001101, + 6435 => 0.001009, + 34386 => 0.0007755, + 1208 => 0.00061, + 25366 => 0.0005675, }, "token": 16053, }, { "probabilities": Map { - 311 => 0.988279, - 1524 => 0.0061858, - 11 => 0.0025774, + 311 => 0.9882814, + 1524 => 0.0061828, + 11 => 0.0025772, 323 => 0.0005243, 13 => 0.0003535, - 627 => 0.0003211, + 627 => 0.0003212, 1606 => 0.0002642, 2288 => 0.0002583, 369 => 0.0001247, @@ -203,46 +203,46 @@ describe("llama 3.1", () => { }, { "probabilities": Map { - 2512 => 0.7492248, - 1524 => 0.0989405, - 656 => 0.032411, - 636 => 0.0240648, - 7940 => 0.0144123, - 33586 => 0.0108691, - 387 => 0.0086826, - 1781 => 0.0058571, - 1629 => 0.005489, - 3351 => 0.0051125, + 2512 => 0.7492506, + 1524 => 0.0989418, + 656 => 0.032397, + 636 => 0.0240763, + 7940 => 0.0143969, + 33586 => 0.01087, + 387 => 0.0086808, + 1781 => 0.0058532, + 1629 => 0.0054883, + 3351 => 0.0051112, }, "token": 2512, }, { "probabilities": Map { - 922 => 0.9521815, - 1606 => 0.015013, - 11 => 0.014011, - 430 => 0.0029686, - 627 => 0.002315, - 13 => 0.0018864, - 1524 => 0.0018013, + 922 => 0.9521582, + 1606 => 0.0150241, + 11 => 0.0140157, + 430 => 0.002969, + 627 => 0.0023168, + 13 => 0.0018882, + 1524 => 0.0018011, 369 => 0.0017693, - 323 => 0.0009247, - 382 => 0.0008479, + 323 => 0.0009252, + 382 => 0.0008483, }, "token": 922, }, { "probabilities": Map { - 279 => 0.6508148, - 4205 => 0.3128796, - 1148 => 0.0113661, - 1690 => 0.004425, - 904 => 0.0030377, - 1202 => 0.0026803, - 264 => 0.0011171, - 1790 => 0.001086, - 813 => 0.0010579, - 1524 => 0.0007699, + 279 => 0.6508825, + 4205 => 0.3128083, + 1148 => 0.0113708, + 1690 => 0.0044266, + 904 => 0.0030378, + 1202 => 0.0026779, + 264 => 0.001117, + 1790 => 0.0010864, + 813 => 0.0010572, + 1524 => 0.0007698, }, "token": 279, }, @@ -280,15 +280,15 @@ describe("llama 3.1", () => { expect(res).toMatchInlineSnapshot(` [ { - "confidence": 0.4307292, + "confidence": 0.4307095, "token": 578, }, { - "confidence": 0.4222992, + "confidence": 0.4223687, "token": 16053, }, { - "confidence": 0.9981185, + "confidence": 0.9981177, "token": 5679, }, { @@ -296,27 +296,27 @@ describe("llama 3.1", () => { "token": 374, }, { - "confidence": 0.2757553, + "confidence": 0.2758818, "token": 2288, }, { - "confidence": 0.9065909, + "confidence": 0.9066046, "token": 16053, }, { - "confidence": 0.988279, + "confidence": 0.9882814, "token": 311, }, { - "confidence": 0.7492248, + "confidence": 0.7492506, "token": 2512, }, { - "confidence": 0.9521815, + "confidence": 0.9521582, "token": 922, }, { - "confidence": 0.6508148, + "confidence": 0.6508825, "token": 279, }, ] @@ -353,49 +353,49 @@ describe("llama 3.1", () => { expect(res).toMatchInlineSnapshot(` [ { - "confidence": 0.4307292, + "confidence": 0.4307095, "probabilities": Map { - 578 => 0.4307292, - 1115 => 0.1304377, - 1102 => 0.0516853, - 763 => 0.042889, - 1283 => 0.029397, - 2100 => 0.0293787, - 15636 => 0.0262684, - 2030 => 0.021849, - 320 => 0.016903, - 1628 => 0.0118695, + 578 => 0.4307095, + 1115 => 0.1304636, + 1102 => 0.0516819, + 763 => 0.0428933, + 1283 => 0.0293915, + 2100 => 0.0293782, + 15636 => 0.0262626, + 2030 => 0.0218519, + 320 => 0.0169018, + 1628 => 0.0118644, }, "token": 578, }, { - "confidence": 0.4222992, + "confidence": 0.4223687, "probabilities": Map { - 16053 => 0.4222992, - 4062 => 0.3036339, - 39935 => 0.0603973, - 2944 => 0.0373043, - 5679 => 0.0238118, - 11914 => 0.0162981, - 2144 => 0.0146835, - 1121 => 0.0069849, - 17571 => 0.0057944, - 3446 => 0.0049346, + 16053 => 0.4223687, + 4062 => 0.303549, + 39935 => 0.0603321, + 2944 => 0.0373496, + 5679 => 0.0237923, + 11914 => 0.0163001, + 2144 => 0.0146822, + 1121 => 0.0069893, + 17571 => 0.0057973, + 3446 => 0.0049349, }, "token": 16053, }, { - "confidence": 0.9981185, + "confidence": 0.9981177, "probabilities": Map { - 5679 => 0.9981185, - 12875 => 0.0001592, + 5679 => 0.9981177, + 12875 => 0.0001593, 18964 => 0.0001154, - 39935 => 0.000115, - 13 => 0.0001049, + 39935 => 0.0001149, + 13 => 0.000105, 627 => 0.0000928, - 656 => 0.0000625, + 656 => 0.0000626, 893 => 0.0000563, - 198 => 0.0000522, + 198 => 0.0000523, 374 => 0.0000519, }, "token": 5679, @@ -404,59 +404,59 @@ describe("llama 3.1", () => { "confidence": 0.8126541, "probabilities": Map { 374 => 0.8126541, - 1587 => 0.0481526, - 596 => 0.0247368, - 1120 => 0.0223041, - 3250 => 0.0215465, - 706 => 0.0161833, - 15849 => 0.0086943, - 1053 => 0.0059125, - 55064 => 0.0037811, + 1587 => 0.0481505, + 596 => 0.0247274, + 1120 => 0.022311, + 3250 => 0.0215521, + 706 => 0.0161821, + 15849 => 0.0086956, + 1053 => 0.0059156, + 55064 => 0.0037815, 11 => 0.0036657, }, "token": 374, }, { - "confidence": 0.2757553, + "confidence": 0.2758818, "probabilities": Map { - 2288 => 0.2757553, - 1120 => 0.1666547, - 539 => 0.1577473, - 779 => 0.133445, - 264 => 0.0558533, - 1101 => 0.0292142, - 16053 => 0.0176781, - 5042 => 0.015864, - 1193 => 0.014582, - 2744 => 0.0140904, + 2288 => 0.2758818, + 1120 => 0.1666409, + 539 => 0.1577165, + 779 => 0.1333762, + 264 => 0.0558459, + 1101 => 0.029207, + 16053 => 0.0176698, + 5042 => 0.0158617, + 1193 => 0.0145808, + 2744 => 0.0140919, }, "token": 2288, }, { - "confidence": 0.9065909, + "confidence": 0.9066046, "probabilities": Map { - 16053 => 0.9065909, - 13326 => 0.0636439, - 19781 => 0.007158, - 17551 => 0.0020244, - 10968 => 0.0012683, - 11920 => 0.0011008, - 6435 => 0.0010087, - 34386 => 0.0007758, - 1208 => 0.0006099, - 25366 => 0.0005672, + 16053 => 0.9066046, + 13326 => 0.0636245, + 19781 => 0.007155, + 17551 => 0.0020255, + 10968 => 0.0012684, + 11920 => 0.001101, + 6435 => 0.001009, + 34386 => 0.0007755, + 1208 => 0.00061, + 25366 => 0.0005675, }, "token": 16053, }, { - "confidence": 0.988279, + "confidence": 0.9882814, "probabilities": Map { - 311 => 0.988279, - 1524 => 0.0061858, - 11 => 0.0025774, + 311 => 0.9882814, + 1524 => 0.0061828, + 11 => 0.0025772, 323 => 0.0005243, 13 => 0.0003535, - 627 => 0.0003211, + 627 => 0.0003212, 1606 => 0.0002642, 2288 => 0.0002583, 369 => 0.0001247, @@ -465,50 +465,50 @@ describe("llama 3.1", () => { "token": 311, }, { - "confidence": 0.7492248, + "confidence": 0.7492506, "probabilities": Map { - 2512 => 0.7492248, - 1524 => 0.0989405, - 656 => 0.032411, - 636 => 0.0240648, - 7940 => 0.0144123, - 33586 => 0.0108691, - 387 => 0.0086826, - 1781 => 0.0058571, - 1629 => 0.005489, - 3351 => 0.0051125, + 2512 => 0.7492506, + 1524 => 0.0989418, + 656 => 0.032397, + 636 => 0.0240763, + 7940 => 0.0143969, + 33586 => 0.01087, + 387 => 0.0086808, + 1781 => 0.0058532, + 1629 => 0.0054883, + 3351 => 0.0051112, }, "token": 2512, }, { - "confidence": 0.9521815, + "confidence": 0.9521582, "probabilities": Map { - 922 => 0.9521815, - 1606 => 0.015013, - 11 => 0.014011, - 430 => 0.0029686, - 627 => 0.002315, - 13 => 0.0018864, - 1524 => 0.0018013, + 922 => 0.9521582, + 1606 => 0.0150241, + 11 => 0.0140157, + 430 => 0.002969, + 627 => 0.0023168, + 13 => 0.0018882, + 1524 => 0.0018011, 369 => 0.0017693, - 323 => 0.0009247, - 382 => 0.0008479, + 323 => 0.0009252, + 382 => 0.0008483, }, "token": 922, }, { - "confidence": 0.6508148, + "confidence": 0.6508825, "probabilities": Map { - 279 => 0.6508148, - 4205 => 0.3128796, - 1148 => 0.0113661, - 1690 => 0.004425, - 904 => 0.0030377, - 1202 => 0.0026803, - 264 => 0.0011171, - 1790 => 0.001086, - 813 => 0.0010579, - 1524 => 0.0007699, + 279 => 0.6508825, + 4205 => 0.3128083, + 1148 => 0.0113708, + 1690 => 0.0044266, + 904 => 0.0030378, + 1202 => 0.0026779, + 264 => 0.001117, + 1790 => 0.0010864, + 813 => 0.0010572, + 1524 => 0.0007698, }, "token": 279, }, diff --git a/test/modelDependent/llama3.2/promptCompletion.test.ts b/test/modelDependent/llama3.2/promptCompletion.test.ts index 9362f25e..a0db42f2 100644 --- a/test/modelDependent/llama3.2/promptCompletion.test.ts +++ b/test/modelDependent/llama3.2/promptCompletion.test.ts @@ -43,7 +43,7 @@ describe("llama 3.2", () => { const promptCompletion = await chatSession.completePrompt("Hi there!", { maxTokens: 11 }); - expect(promptCompletion).toMatchInlineSnapshot(`" I'm looking for a new phone case. I need"`); + expect(promptCompletion).toMatchInlineSnapshot("\" I'm looking for a new phone case. I need\""); expect(LlamaText.fromTokens(model.tokenizer, chatSession.sequence.contextTokens)).toMatchInlineSnapshot(` LlamaText([ new SpecialToken("BOS"), diff --git a/test/modelDependent/qwen3-0.6b/functions.test.ts b/test/modelDependent/qwen3-0.6b/functions.test.ts index 07bd2899..46bd9f2f 100644 --- a/test/modelDependent/qwen3-0.6b/functions.test.ts +++ b/test/modelDependent/qwen3-0.6b/functions.test.ts @@ -198,7 +198,7 @@ describe("qwen3 0.6b", () => { const res2 = await chatSession.prompt([ "The owner has 3 apps: App1, App2, and App3.", - "Notify the owner with a main notifications about 'apps time', with sub notifications for each app with the app's name.", + "Notify the owner with a main notifications about 'apps time', with a sub-notification for each app with the app's name.", "Under each app sub-notification add a sub-notification with the app's number." ].join("\n"), { ...promptOptions, diff --git a/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts b/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts index c2ad773f..39a722b0 100644 --- a/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts +++ b/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts @@ -111,7 +111,7 @@ describe("stableCode", () => { freeVram: s1GB * 3 }); expect(res.gpuLayers).to.eql(16); - expect(res.contextSize).to.toMatchInlineSnapshot("8064"); + expect(res.contextSize).to.toMatchInlineSnapshot("8061"); } try { await resolveGpuLayers(16, { @@ -142,7 +142,7 @@ describe("stableCode", () => { ignoreMemorySafetyChecks: true }); expect(res.gpuLayers).to.eql(16); - expect(res.contextSize).to.toMatchInlineSnapshot("138"); + expect(res.contextSize).to.toMatchInlineSnapshot("136"); } @@ -174,7 +174,7 @@ describe("stableCode", () => { freeVram: s1GB * 6 }); expect(res.gpuLayers).to.eql(32); - expect(res.contextSize).to.toMatchInlineSnapshot("11348"); + expect(res.contextSize).to.toMatchInlineSnapshot("11347"); } try { await resolveGpuLayers(32, { @@ -192,7 +192,7 @@ describe("stableCode", () => { ignoreMemorySafetyChecks: true }); expect(res.gpuLayers).to.eql(32); - expect(res.contextSize).to.toMatchInlineSnapshot("48"); + expect(res.contextSize).to.toMatchInlineSnapshot("47"); } { @@ -223,7 +223,7 @@ describe("stableCode", () => { freeVram: s1GB * 6 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("11348"); + expect(res.contextSize).to.toMatchInlineSnapshot("11347"); } try { await resolveGpuLayers(33, { @@ -241,7 +241,7 @@ describe("stableCode", () => { ignoreMemorySafetyChecks: true }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("48"); + expect(res.contextSize).to.toMatchInlineSnapshot("47"); } { @@ -303,7 +303,7 @@ describe("stableCode", () => { ignoreMemorySafetyChecks: true }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("48"); + expect(res.contextSize).to.toMatchInlineSnapshot("47"); } { const res = await resolveGpuLayers("max", { @@ -311,7 +311,7 @@ describe("stableCode", () => { freeVram: s1GB * 4 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("5887"); + expect(res.contextSize).to.toMatchInlineSnapshot("5886"); } { const res = await resolveGpuLayers("max", { @@ -319,7 +319,7 @@ describe("stableCode", () => { freeVram: s1GB * 4.4 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("6979"); + expect(res.contextSize).to.toMatchInlineSnapshot("6978"); } { const res = await resolveGpuLayers("max", { @@ -327,7 +327,7 @@ describe("stableCode", () => { freeVram: s1GB * 4.8 }); expect(res.gpuLayers).to.eql(33); - expect(res.contextSize).to.toMatchInlineSnapshot("8072"); + expect(res.contextSize).to.toMatchInlineSnapshot("8070"); } }); @@ -346,7 +346,7 @@ describe("stableCode", () => { freeVram: s1GB * 0.4 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("1"); - expect(res.contextSize).to.toMatchInlineSnapshot("10864"); + expect(res.contextSize).to.toMatchInlineSnapshot("10841"); } { const res = await resolveGpuLayers("auto", { @@ -362,7 +362,7 @@ describe("stableCode", () => { freeVram: s1GB * 1.4 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("5"); - expect(res.contextSize).to.toMatchInlineSnapshot("8368"); + expect(res.contextSize).to.toMatchInlineSnapshot("8361"); } { const res = await resolveGpuLayers("auto", { @@ -370,7 +370,7 @@ describe("stableCode", () => { freeVram: s1GB * 2.4 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("1518"); + expect(res.contextSize).to.toMatchInlineSnapshot("1517"); } { const res = await resolveGpuLayers("auto", { @@ -378,7 +378,7 @@ describe("stableCode", () => { freeVram: s1GB * 3.1 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("3429"); + expect(res.contextSize).to.toMatchInlineSnapshot("3428"); } { const res = await resolveGpuLayers("auto", { @@ -386,7 +386,7 @@ describe("stableCode", () => { freeVram: s1GB * 3.3 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("3976"); + expect(res.contextSize).to.toMatchInlineSnapshot("3974"); } { const res = await resolveGpuLayers("auto", { @@ -394,7 +394,7 @@ describe("stableCode", () => { freeVram: s1GB * 3.5 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("4522"); + expect(res.contextSize).to.toMatchInlineSnapshot("4520"); } { const res = await resolveGpuLayers("auto", { @@ -402,7 +402,7 @@ describe("stableCode", () => { freeVram: s1GB * 3.8 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("5341"); + expect(res.contextSize).to.toMatchInlineSnapshot("5340"); } { const res = await resolveGpuLayers("auto", { @@ -410,7 +410,7 @@ describe("stableCode", () => { freeVram: s1GB * 4 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("5887"); + expect(res.contextSize).to.toMatchInlineSnapshot("5886"); } { const res = await resolveGpuLayers("auto", { @@ -418,7 +418,7 @@ describe("stableCode", () => { freeVram: s1GB * 4.3 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("6706"); + expect(res.contextSize).to.toMatchInlineSnapshot("6705"); } { const res = await resolveGpuLayers("auto", { @@ -426,7 +426,7 @@ describe("stableCode", () => { freeVram: s1GB * 4.5 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("7252"); + expect(res.contextSize).to.toMatchInlineSnapshot("7251"); } { const res = await resolveGpuLayers("auto", { @@ -434,7 +434,7 @@ describe("stableCode", () => { freeVram: s1GB * 4.8 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("8072"); + expect(res.contextSize).to.toMatchInlineSnapshot("8070"); } { const res = await resolveGpuLayers("auto", { @@ -442,7 +442,7 @@ describe("stableCode", () => { freeVram: s1GB * 5.2 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("9164"); + expect(res.contextSize).to.toMatchInlineSnapshot("9163"); } { const res = await resolveGpuLayers("auto", { @@ -450,7 +450,7 @@ describe("stableCode", () => { freeVram: s1GB * 5.8 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("10802"); + expect(res.contextSize).to.toMatchInlineSnapshot("10801"); } { const res = await resolveGpuLayers("auto", { @@ -458,7 +458,7 @@ describe("stableCode", () => { freeVram: s1GB * 6 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("11348"); + expect(res.contextSize).to.toMatchInlineSnapshot("11347"); } }); @@ -504,7 +504,7 @@ describe("stableCode", () => { freeVram: s1GB * 4 }); expect(res.gpuLayers).to.eql(16); - expect(res.contextSize).to.toMatchInlineSnapshot("13255"); + expect(res.contextSize).to.toMatchInlineSnapshot("13252"); } try { await resolveGpuLayers({min: 16}, { @@ -522,7 +522,7 @@ describe("stableCode", () => { }); expect(res.gpuLayers).to.be.gte(16); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("5887"); + expect(res.contextSize).to.toMatchInlineSnapshot("5886"); } { const res = await resolveGpuLayers({min: 16, max: 24}, { @@ -532,7 +532,7 @@ describe("stableCode", () => { expect(res.gpuLayers).to.be.gte(16); expect(res.gpuLayers).to.be.lte(24); expect(res.gpuLayers).to.toMatchInlineSnapshot("23"); - expect(res.contextSize).to.toMatchInlineSnapshot("8249"); + expect(res.contextSize).to.toMatchInlineSnapshot("8248"); } { const res = await resolveGpuLayers({min: 16, max: 24}, { @@ -542,7 +542,7 @@ describe("stableCode", () => { expect(res.gpuLayers).to.be.gte(16); expect(res.gpuLayers).to.be.lte(24); expect(res.gpuLayers).to.toMatchInlineSnapshot("16"); - expect(res.contextSize).to.toMatchInlineSnapshot("8064"); + expect(res.contextSize).to.toMatchInlineSnapshot("8061"); } }); @@ -565,7 +565,7 @@ describe("stableCode", () => { freeVram: s1GB * 4 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("33"); - expect(res.contextSize).to.toMatchInlineSnapshot("5887"); + expect(res.contextSize).to.toMatchInlineSnapshot("5886"); expect(res.contextSize).to.be.gte(contextSize); } { @@ -575,7 +575,7 @@ describe("stableCode", () => { freeVram: s1GB * 1 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("3"); - expect(res.contextSize).to.toMatchInlineSnapshot("5933"); + expect(res.contextSize).to.toMatchInlineSnapshot("5921"); expect(res.contextSize).to.be.gte(contextSize); } { @@ -585,7 +585,7 @@ describe("stableCode", () => { freeVram: s1GB * 4 }); expect(res.gpuLayers).to.toMatchInlineSnapshot("21"); - expect(res.contextSize).to.toMatchInlineSnapshot("9208"); + expect(res.contextSize).to.toMatchInlineSnapshot("9206"); expect(res.contextSize).to.be.gte(contextSize); } {