Skip to content

Commit 2204e7a

Browse files
authored
fix: no thread limit when using a GPU (#322)
* fix: no thread limit when using a GPU * fix: improve `defineChatSessionFunction` types and docs * fix: format numbers printed in the CLI * fix: disable the browser's autocomplete in the docs search
1 parent d0795c1 commit 2204e7a

File tree

14 files changed

+217
-144
lines changed

14 files changed

+217
-144
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,7 @@ jobs:
698698
export DOCS_PACKAGE_VERSION="$(cat ./docsVersion.txt)"
699699
echo "Package version: $DOCS_PACKAGE_VERSION"
700700
701+
git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch
701702
npm run docs:build
702703
- name: Upload docs
703704
uses: actions/upload-artifact@v4

.github/workflows/test.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,6 @@ jobs:
4242
CI: true
4343
run: node ./dist/cli/cli.js source download --release latest --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle
4444
- name: Compile docs
45-
run: npm run docs:build
45+
run: |
46+
git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch
47+
npm run docs:build

.releaserc.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ function getDryRunResult() {
8383
}
8484

8585
const dryRunResult = getDryRunResult();
86+
console.info("Next release type", dryRunResult?.nextRelease?.type);
8687
if (dryRunResult == null || !(dryRunResult.nextRelease.type === "major" || dryRunResult.nextRelease.type === "minor"))
8788
githubPluginConfig.discussionCategoryName = false;
8889

package-lock.json

Lines changed: 115 additions & 113 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@
132132
"@resvg/resvg-js": "^2.6.2",
133133
"@semantic-release/exec": "^6.0.3",
134134
"@semantic-release/npm": "12.0.1",
135-
"@shikijs/vitepress-twoslash": "^1.17.7",
135+
"@shikijs/vitepress-twoslash": "^1.18.0",
136136
"@types/async-retry": "^1.4.8",
137137
"@types/bytes": "^3.1.4",
138138
"@types/cross-spawn": "^6.0.2",
@@ -160,13 +160,13 @@
160160
"tslib": "^2.7.0",
161161
"typedoc": "^0.26.7",
162162
"typedoc-plugin-markdown": "^4.2.7",
163-
"typedoc-plugin-mdn-links": "^3.2.12",
163+
"typedoc-plugin-mdn-links": "^3.3.0",
164164
"typedoc-vitepress-theme": "^1.0.1",
165165
"typescript": "^5.6.2",
166166
"vite-node": "^2.1.1",
167-
"vitepress": "^1.3.4",
167+
"vitepress": "1.3.4",
168168
"vitest": "^2.1.1",
169-
"zx": "^8.1.7"
169+
"zx": "^8.1.8"
170170
},
171171
"dependencies": {
172172
"@huggingface/jinja": "^0.3.1",
@@ -192,7 +192,7 @@
192192
"pretty-ms": "^9.1.0",
193193
"proper-lockfile": "^4.1.2",
194194
"semver": "^7.6.3",
195-
"simple-git": "^3.26.0",
195+
"simple-git": "^3.27.0",
196196
"slice-ansi": "^7.1.0",
197197
"stdout-update": "^4.0.1",
198198
"strip-ansi": "^7.1.0",
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/node_modules/vitepress/dist/client/theme-default/components/VPLocalSearchBox.vue b/node_modules/vitepress/dist/client/theme-default/components/VPLocalSearchBox.vue
2+
index c8aded4..ccd5eff 100644
3+
--- a/node_modules/vitepress/dist/client/theme-default/components/VPLocalSearchBox.vue
4+
+++ b/node_modules/vitepress/dist/client/theme-default/components/VPLocalSearchBox.vue
5+
@@ -443,6 +443,7 @@ function formMarkRegex(terms: Set<string>) {
6+
:placeholder="buttonText"
7+
id="localsearch-input"
8+
aria-labelledby="localsearch-label"
9+
+ autocomplete="off"
10+
class="search-input"
11+
/>
12+
<div class="search-actions">

src/bindings/Llama.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const addonLogLevelToLlamaLogLevel: ReadonlyMap<number, LlamaLogLevel> = new Map
2424
[...LlamaLogLevelToAddonLogLevel.entries()].map(([key, value]) => [value, key])
2525
);
2626
const defaultLogLevel = 5;
27-
const defaultMinThreadSplitterThreads = 4;
27+
const defaultCPUMinThreadSplitterThreads = 4;
2828

2929
export class Llama {
3030
/** @internal */ public readonly _bindings: BindingModule;
@@ -86,7 +86,13 @@ export class Llama {
8686
this._debug = debug;
8787
this._vramOrchestrator = vramOrchestrator;
8888
this._vramPadding = vramPadding;
89-
this._threadsSplitter = new ThreadsSplitter(maxThreads ?? Math.max(defaultMinThreadSplitterThreads, this._mathCores));
89+
this._threadsSplitter = new ThreadsSplitter(
90+
maxThreads ?? (
91+
this._gpu === false
92+
? Math.max(defaultCPUMinThreadSplitterThreads, this._mathCores)
93+
: 0
94+
)
95+
);
9096

9197
this._logLevel = this._debug
9298
? LlamaLogLevel.debug
@@ -155,14 +161,16 @@ export class Llama {
155161
/**
156162
* The maximum number of threads that can be used by the Llama instance.
157163
*
158-
* Default to `cpuMathCores`.
164+
* If set to `0`, the Llama instance will have no limit on the number of threads.
165+
*
166+
* See the `maxThreads` option of `getLlama` for more information.
159167
*/
160168
public get maxThreads() {
161169
return this._threadsSplitter.maxThreads;
162170
}
163171

164172
public set maxThreads(value: number) {
165-
this._threadsSplitter.maxThreads = Math.floor(Math.max(1, value));
173+
this._threadsSplitter.maxThreads = Math.floor(Math.max(0, value));
166174
}
167175

168176
public get logLevel() {

src/bindings/getLlama.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,11 @@ export type LlamaOptions = {
114114
/**
115115
* The maximum number of threads to use for the Llama instance.
116116
*
117-
* Defaults to the number of CPU cores that are useful for math (`.cpuMathCores`), or `4`, whichever is higher.
117+
* Set to `0` to have no thread limit.
118+
*
119+
* When not using a GPU, defaults to the number of CPU cores that are useful for math (`.cpuMathCores`), or `4`, whichever is higher.
120+
*
121+
* When using a GPU, there's no limit by default.
118122
*/
119123
maxThreads?: number,
120124

@@ -174,7 +178,11 @@ export type LastBuildOptions = {
174178
/**
175179
* The maximum number of threads to use for the Llama instance.
176180
*
177-
* Defaults to the number of CPU cores that are useful for math (`.cpuMathCores`), or `4`, whichever is higher.
181+
* Set to `0` to have no thread limit.
182+
*
183+
* When not using a GPU, defaults to the number of CPU cores that are useful for math (`.cpuMathCores`), or `4`, whichever is higher.
184+
*
185+
* When using a GPU, there's no limit by default.
178186
*/
179187
maxThreads?: number,
180188

src/cli/utils/printCommonInfoLines.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,22 +72,22 @@ export async function printCommonInfoLines({
7272
value: () => toOneLine(String(model.tokens.eosString))
7373
}, {
7474
title: "Train context size",
75-
value: String(model.trainContextSize)
75+
value: model.trainContextSize.toLocaleString("en-US")
7676
}]
7777
});
7878
printInfoLine({
7979
title: "Context",
8080
padTitle: padTitle,
8181
info: [{
8282
title: "Size",
83-
value: String(context.contextSize)
83+
value: context.contextSize.toLocaleString("en-US")
8484
}, {
8585
title: "Threads",
86-
value: String(context.currentThreads)
86+
value: context.currentThreads.toLocaleString("en-US")
8787
}, {
8888
show: logBatchSize,
8989
title: "Batch size",
90-
value: bytes(context.batchSize)
90+
value: context.batchSize.toLocaleString("en-US")
9191
}, {
9292
show: context.flashAttention,
9393
title: "Flash attention",

src/evaluator/LlamaChatSession/utils/defineChatSessionFunction.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import {ChatSessionModelFunction} from "../../../types.js";
55
* Define a function that can be used by the model in a chat session, and return it.
66
*
77
* This is a helper function to facilitate defining functions with full TypeScript type information.
8+
*
9+
* The handler function can return a Promise, and the return value will be awaited before being returned to the model.
810
* @param functionDefinition
911
*/
1012
export function defineChatSessionFunction<const Params extends GbnfJsonSchema | undefined>({
@@ -13,8 +15,8 @@ export function defineChatSessionFunction<const Params extends GbnfJsonSchema |
1315
handler
1416
}: {
1517
description?: string,
16-
params?: Params,
17-
handler: (params: GbnfJsonSchemaToType<Params>) => any
18+
params?: Params & GbnfJsonSchema,
19+
handler: (params: GbnfJsonSchemaToType<Params>) => Promise<any> | any
1820
}): ChatSessionModelFunction<Params> {
1921
return {
2022
description,

0 commit comments

Comments
 (0)