Skip to content

Commit 5a70576

Browse files
authored
feat: use the best compute layer available by default (#175)
* feat: detect the available compute layers on the system and use the best one by default * feat: more guardrails to not load an incompatible prebuilt binary, to prevent process crashes due to linux distro differences * feat: improve logs as to why system-related issues occur and how to fix them * feat: `inspect` command * feat: add `GemmaChatWrapper` * feat: `TemplateChatWrapper` - easier method to create simple chat wrappers, see the type docs for more info * fix: adapt to `llama.cpp` breaking change * fix: when a specific compute layer is requested, fail the build if it is not found * fix: return user-defined llama tokens * docs: update more docs to prepare for version 3.0
1 parent bcaab4f commit 5a70576

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+3327
-434
lines changed

.config/typedoc.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@
1818
"preserveAnchorCasing": true,
1919
"useCodeBlocks": true,
2020
"expandObjects": true,
21+
"expandParameters": true,
2122
"parametersFormat": "table",
23+
"propertiesFormat": "list",
24+
"enumMembersFormat": "table",
25+
"typeDeclarationFormat": "list",
2226
"hideInPageTOC": true,
2327
"docsRoot": "../docs"
2428
}

.github/workflows/build.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -198,19 +198,19 @@ jobs:
198198
199199
// build binaries
200200
if (process.env.ARTIFACT_NAME === "win") {
201-
await buildBinary("x64");
202-
await buildBinary("x64", ["--cuda"]);
203-
await buildBinary("x64", ["--vulkan"]);
204-
// await buildBinary("arm64", [], windowsOnArmNodeVersion); // disabled arm64 for now as compilation doesn't work
201+
await buildBinary("x64", ["--gpu", "false"]);
202+
await buildBinary("x64", ["--gpu", "cuda"]);
203+
await buildBinary("x64", ["--gpu", "vulkan"]);
204+
// await buildBinary("arm64", ["--gpu", "false"], windowsOnArmNodeVersion); // disabled arm64 for now as compilation doesn't work
205205
} else if (process.env.ARTIFACT_NAME === "linux") {
206-
await buildBinary("x64");
207-
await buildBinary("x64", ["--cuda"]);
208-
await buildBinary("x64", ["--vulkan"]);
209-
await buildBinary("arm64");
210-
await buildBinary("armv7l");
206+
await buildBinary("x64", ["--gpu", "false"]);
207+
await buildBinary("x64", ["--gpu", "cuda"]);
208+
await buildBinary("x64", ["--gpu", "vulkan"]);
209+
await buildBinary("arm64", ["--gpu", "false"]);
210+
await buildBinary("armv7l", ["--gpu", "false"]);
211211
} else if (process.env.ARTIFACT_NAME === "mac") {
212-
await buildBinary("arm64", ["--metal"]);
213-
await buildBinary("x64", ["--no-metal"]);
212+
await buildBinary("arm64", ["--gpu", "metal"]);
213+
await buildBinary("x64", ["--gpu", "false"]);
214214
}
215215
216216
// move binaries to llamaBins
@@ -275,7 +275,7 @@ jobs:
275275
name: Model dependent tests
276276
runs-on: macos-14
277277
env:
278-
NODE_LLAMA_CPP_METAL: false
278+
NODE_LLAMA_CPP_GPU: false
279279
needs:
280280
- build
281281
steps:

.vitepress/config.ts

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,19 @@ const chatWrappersOrder = [
2323
"FalconChatWrapper"
2424
] as const;
2525

26+
const categoryOrder = [
27+
"Functions",
28+
"Classes",
29+
"Types",
30+
"Enums"
31+
] as const;
32+
33+
const functionsOrder = [
34+
"getLlama",
35+
"defineChatSessionFunction",
36+
"LlamaText"
37+
] as const;
38+
2639
function resolveHref(href: string) {
2740
if (urlBase == null)
2841
return href;
@@ -149,7 +162,9 @@ export default defineConfig({
149162
items: [
150163
{text: "Building from source", link: "/building-from-source"},
151164
{text: "Metal support", link: "/Metal"},
152-
{text: "CUDA support", link: "/CUDA"}
165+
{text: "CUDA support", link: "/CUDA"},
166+
{text: "Vulkan support", link: "/vulkan"},
167+
{text: "Troubleshooting", link: "/troubleshooting"}
153168
]
154169
}, {
155170
text: "Contributing",
@@ -166,6 +181,9 @@ export default defineConfig({
166181
items: [
167182
{text: "Chat", link: "/chat"},
168183
{text: "Download", link: "/download"},
184+
{text: "Complete", link: "/complete"},
185+
{text: "Infill", link: "/infill"},
186+
{text: "Inspect", link: "/inspect"},
169187
{text: "Build", link: "/build"},
170188
{text: "Clear", link: "/clear"}
171189
]
@@ -184,6 +202,7 @@ function getApiReferenceSidebar(): typeof typedocSidebar {
184202
case "README":
185203
case "API":
186204
return null;
205+
187206
case "Classes":
188207
case "Type Aliases":
189208
case "Functions":
@@ -200,7 +219,23 @@ function getApiReferenceSidebar(): typeof typedocSidebar {
200219
delete subItem.collapsed;
201220

202221
return subItem;
203-
})
222+
});
223+
224+
return item;
225+
226+
case "Enumerations":
227+
item.text = "Enums";
228+
229+
if (item.collapsed)
230+
item.collapsed = false;
231+
return item;
232+
233+
case "Variables":
234+
item.text = "Enums";
235+
236+
if (item.collapsed)
237+
item.collapsed = false;
238+
204239
return item;
205240
}
206241

@@ -214,6 +249,8 @@ function orderApiReferenceSidebar(sidebar: typeof typedocSidebar): typeof typedo
214249
orderTypes(sidebar);
215250
orderFunctions(sidebar);
216251

252+
sortItemsInOrder(sidebar, categoryOrder);
253+
217254
return sidebar;
218255
}
219256

@@ -345,18 +382,20 @@ function orderTypes(sidebar: typeof typedocSidebar) {
345382
}
346383

347384
function orderFunctions(sidebar: typeof typedocSidebar) {
348-
const types = sidebar.find((item) => item.text === "Functions");
385+
const functions = sidebar.find((item) => item.text === "Functions");
349386

350-
if (types == null || !(types.items instanceof Array))
387+
if (functions == null || !(functions.items instanceof Array))
351388
return;
352389

353390
groupItems(
354-
types.items,
391+
functions.items,
355392
(item) => item.text === "LlamaText",
356393
(item) => item.text != null && ["isLlamaText", "tokenizeText"].includes(item.text)
357394
);
358395

359-
moveCollapseItemsToTheEnd(types.items);
396+
sortItemsInOrder(functions.items, functionsOrder);
397+
398+
moveCollapseItemsToTheEnd(functions.items);
360399
}
361400

362401

.vitepress/utils/getCommandHtmlDoc.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,11 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}
161161
}
162162

163163
if (option.type != null) {
164-
optionDescription.push(`(<code>${htmlEscape(option.type)}</code>)`);
164+
optionDescription.push(`<code><span style="opacity: 0.4">(</span>${htmlEscape(option.type)}<span style="opacity: 0.4">)</span></code>`);
165165
}
166166

167167
if (option.demandOption) {
168-
optionDescription.push(`(<code>${htmlEscape("required")}</code>)`);
168+
optionDescription.push(`<code><span style="opacity: 0.4">(</span>${htmlEscape("required")}<span style="opacity: 0.4">)</span></code>`);
169169
}
170170

171171
if (option.choices != null) {

docs/guide/cli/cli.data.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {BuildCommand} from "../../../src/cli/commands/BuildCommand.js";
44
import {ChatCommand} from "../../../src/cli/commands/ChatCommand.js";
55
import {CompleteCommand} from "../../../src/cli/commands/CompleteCommand.js";
66
import {InfillCommand} from "../../../src/cli/commands/InfillCommand.js";
7+
import {InspectCommand} from "../../../src/cli/commands/InspectCommand.js";
78
import {DownloadCommand} from "../../../src/cli/commands/DownloadCommand.js";
89
import {ClearCommand} from "../../../src/cli/commands/ClearCommand.js";
910
import {htmlEscape} from "../../../.vitepress/utils/htmlEscape.js";
@@ -21,6 +22,7 @@ export default {
2122
["chat", ChatCommand],
2223
["complete", CompleteCommand],
2324
["infill", InfillCommand],
25+
["inspect", InspectCommand],
2426
["download", DownloadCommand],
2527
["build", BuildCommand],
2628
["clear", ClearCommand]
@@ -29,6 +31,7 @@ export default {
2931
chat: await getCommandHtmlDoc(ChatCommand),
3032
complete: await getCommandHtmlDoc(CompleteCommand),
3133
infill: await getCommandHtmlDoc(InfillCommand),
34+
inspect: await getCommandHtmlDoc(InspectCommand),
3235
download: await getCommandHtmlDoc(DownloadCommand),
3336
build: await getCommandHtmlDoc(BuildCommand),
3437
clear: await getCommandHtmlDoc(ClearCommand)

docs/guide/cli/inspect.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
outline: deep
3+
---
4+
# `inspect` command
5+
6+
<script setup lang="ts">
7+
import {data as docs} from "./cli.data.js";
8+
const commandDoc = docs.inspect;
9+
</script>
10+
11+
{{commandDoc.description}}
12+
13+
## Usage
14+
```shell-vue
15+
{{commandDoc.usage}}
16+
```
17+
<div v-html="commandDoc.options"></div>

docs/guide/troubleshooting.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
---
2+
outline: [2, 3]
3+
---
4+
# Troubleshooting
5+
## ESM usage
6+
`node-llama-cpp` is an [ES module](https://nodejs.org/api/esm.html#modules-ecmascript-modules), so can only use `import` to load it and cannot use [`require`](https://nodejs.org/docs/latest-v18.x/api/esm.html#require:~:text=Using%20require%20to%20load%20an%20ES%20module%20is%20not%20supported%20because%20ES%20modules%20have%20asynchronous%20execution.%20Instead%2C%20use%20import()%20to%20load%20an%20ES%20module%20from%20a%20CommonJS%20module.).
7+
8+
Since the Node.js ecosystem is transitioning to ESM, it's recommended to use it in your project.
9+
10+
To do so, make sure your `package.json` file has `"type": "module"` in it.
11+
12+
### Using in CommonJS
13+
If you cannot use ESM in your project, you can still use the `import` function from a CommonJS module to load `node-llama-cpp`:
14+
```typescript
15+
async function myLogic() {
16+
const {getLlama} = await import("node-llama-cpp");
17+
}
18+
19+
myLogic();
20+
```
21+
22+
If your `tsconfig.json` is configured to transpile `import` statements into `require` function calls automatically,
23+
you can use this workaround to `import` `node-llama-cpp`:
24+
```typescript
25+
async function myLogic() {
26+
const {getLlama} = await Function('return import("node-llama-cpp")')();
27+
}
28+
29+
myLogic();
30+
```
31+
32+
## Running in Termux
33+
In Termux, the prebuilt binaries cannot be used due to the custom linker used by it.
34+
35+
To allow `node-llama-cpp` to build the binaries, install the required packages first:
36+
```bash
37+
pkg update
38+
pkg install nodejs git cmake clang libxml2
39+
```
40+
41+
For Vulkan support, also install the following packages:
42+
```bash
43+
pkg install vulkan-tools vulkan-loader-android vulkan-headers vulkan-extension-layer
44+
```
45+
> Note that your device GPU may not support the required capabilities that `llama.cpp` requires, so it may not work.
46+
>
47+
> If that happens, disable Vulkan in your code or uninstall the Vulkan packages.

docs/guide/vulkan.md

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# Using Vulkan
2+
> Vulkan is a low-overhead, cross-platform 3D graphics and computing API
3+
4+
`node-llama-cpp` ships with prebuilt binaries with Vulkan support for Windows and Linux, and these are automatically used when Vulkan support is detected on your machine.
5+
6+
**Windows:** Vulkan drivers are usually provided together with your GPU drivers, so most chances are that you don't have to install anything.
7+
8+
**Linux:** you have to [install the Vulkan SDK](#vulkan-sdk-ubuntu).
9+
10+
## Testing Vulkan support
11+
To check whether the Vulkan support works on your machine, run this command:
12+
```bash
13+
npx --no node-llama-cpp inspect gpu
14+
```
15+
16+
You should see an output like this:
17+
```ansi
18+
Vulkan: available
19+
20+
Vulkan used VRAM: 0% (64KB/21.33GB)
21+
Vulkan free VRAM: 99.99% (21.33GB/21.33GB)
22+
23+
Used RAM: 97.37% (31.16GB/32GB)
24+
Free RAM: 2.62% (860.72MB/32GB)
25+
```
26+
27+
If you see `Vulkan used VRAM` in the output, it means that Vulkan support is working on your machine.
28+
29+
## Building `node-llama-cpp` with Vulkan support
30+
### Prerequisites
31+
* [`cmake-js` dependencies](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake)
32+
* [CMake](https://cmake.org/download/) 3.26 or higher (optional, recommended if you have build issues)
33+
* <a id="vulkan-sdk" />[Vulkan SDK](https://vulkan.lunarg.com/sdk/home):
34+
>
35+
#### Windows: [Vulkan SDK installer](https://sdk.lunarg.com/sdk/download/latest/windows/vulkan-sdk.exe) {#vulkan-sdk-windows}
36+
>
37+
#### Ubuntu {#vulkan-sdk-ubuntu}
38+
::: code-group
39+
40+
```bash [Ubuntu 22.04]
41+
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
42+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
43+
sudo apt update
44+
sudo apt install vulkan-sdk
45+
```
46+
47+
```bash [Ubuntu 20.04]
48+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
49+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list https://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
50+
sudo apt update
51+
sudo apt install vulkan-sdk
52+
```
53+
54+
:::
55+
56+
## Building from source
57+
When you use the [`getLlama`](../api/functions/getLlama) method, if there's no binary that matches the provided options, it'll automatically build `llama.cpp` from source.
58+
59+
Manually building from source using the [`download`](./cli/download) command is recommended for troubleshooting build issues.
60+
61+
To manually build from source, run this command inside of your project:
62+
```bash
63+
npx --no node-llama-cpp download --gpu vulkan
64+
```
65+
66+
> If `cmake` is not installed on your machine, `node-llama-cpp` will automatically download `cmake` to an internal directory and try to use it to build `llama.cpp` from source.
67+
68+
> If you see the message `Vulkan not found` during the build process,
69+
> it means that the Vulkan SDK is not installed on your machine or that it is not detected by the build process.
70+
71+
## Using `node-llama-cpp` with Vulkan
72+
It's recommended to use [`getLlama`](../api/functions/getLlama) without specifying a GPU type, so it'll detect the available GPU types and use the best one automatically.
73+
74+
To do this, just use [`getLlama`](../api/functions/getLlama) without any parameters:
75+
```typescript
76+
import {getLlama} from "node-llama-cpp";
77+
78+
const llama = await getLlama();
79+
```
80+
81+
To force it to use Vulkan, you can use the [`gpu`](../api/type-aliases/LlamaOptions#gpu) option:
82+
```typescript
83+
import {getLlama} from "node-llama-cpp";
84+
85+
const llama = await getLlama({
86+
gpu: "vulkan"
87+
});
88+
```
89+
To configure how much layers of the model are run on the GPU, configure `gpuLayers` on `LlamaModel` in your code:
90+
```typescript
91+
const model = new LlamaModel({
92+
llama,
93+
modelPath,
94+
gpuLayers: 64 // or any other number of layers you want
95+
});
96+
```
97+
98+
You'll see logs like these in the console when the model loads:
99+
```
100+
llm_load_tensors: ggml ctx size = 0.09 MB
101+
llm_load_tensors: mem required = 41.11 MB (+ 2048.00 MB per state)
102+
llm_load_tensors: offloading 32 repeating layers to GPU
103+
llm_load_tensors: offloading non-repeating layers to GPU
104+
llm_load_tensors: offloading v cache to GPU
105+
llm_load_tensors: offloading k cache to GPU
106+
llm_load_tensors: offloaded 35/35 layers to GPU
107+
llm_load_tensors: VRAM used: 4741 MB
108+
```
109+
110+
On Linux, you can monitor GPU usage with this command:
111+
```bash
112+
watch -d "npx --no node-llama-cpp inspect gpu"
113+
```

0 commit comments

Comments
 (0)