Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
aa5b6e0
feat: extract function calling syntax from the jinja template, QwenCh…
giladgd Mar 20, 2025
4f3b017
fix: limit the context size by default in the node-typescript template
giladgd Mar 20, 2025
1887300
docs: style fix
giladgd Mar 20, 2025
46f0922
docs: troubleshooting function calling when using `JinjaTemplateChatW…
giladgd Mar 20, 2025
f7dae11
docs: fix the command to scaffold a new Electron project
giladgd Mar 20, 2025
d4e1603
fix: adapt to breaking `llama.cpp` changes
giladgd Mar 20, 2025
c1610b8
docs: fix types
giladgd Mar 20, 2025
df96282
fix: better handling for function calling with empty parameters
giladgd Mar 20, 2025
4c1567c
feat: export a `llama` instance getter on a model instance
giladgd Mar 20, 2025
b2bdacc
fix: small optimization
giladgd Mar 20, 2025
a8cc9c0
test: make LlamaText snapshots more readable
giladgd Mar 20, 2025
f89d98a
test: add tests
giladgd Mar 20, 2025
6f043ec
docs: update ID
giladgd Mar 20, 2025
d880b1d
feat: add QwQ 32B to recommended models
giladgd Mar 20, 2025
9a38355
build: run tests on ubuntu arm machine
giladgd Mar 21, 2025
1af81b2
build: change machine type used to run tests on
giladgd Mar 21, 2025
e7c176c
build: change machine type used to run tests on
giladgd Mar 21, 2025
441a8ce
build: make tests run correctly
giladgd Mar 21, 2025
93a41a7
build: cache models on test fail
giladgd Mar 21, 2025
f0e7f0a
build: dispose the llama instance before a test worker exists
giladgd Mar 22, 2025
388542b
chore: update vitest
giladgd Mar 22, 2025
845736a
fix: better husky setup
giladgd Mar 22, 2025
15590c3
test: fix hook
giladgd Mar 22, 2025
fb4c4df
fix: reranking edge case crash
giladgd Mar 22, 2025
dcf795d
fix: prepare script
giladgd Mar 22, 2025
c5ce43e
fix: vitest config
giladgd Mar 22, 2025
817c820
test: remove dispose hook
giladgd Mar 22, 2025
ff8170f
fix: bump min nodejs version to 20
giladgd Mar 22, 2025
6cd8d95
docs: debugging native crash on Linux
giladgd Mar 22, 2025
b9726e6
chore: update modules
giladgd Mar 22, 2025
a8dff21
fix: log level
giladgd Mar 22, 2025
6ccbca3
chore: configure eslint
giladgd Mar 22, 2025
c3c0066
fix: `defineChatSessionFunction` type
giladgd Mar 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 24 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ jobs:

const {versions: latestNodeVersions} = await getLatestNodeVersions(Date.now() - 1000 * 60 * 60 * 24 * 14);

const nodeVersion = latestNodeVersions.get(18);
const nodeVersion = latestNodeVersions.get(20);
const windowsOnArmNodeVersion = latestNodeVersions.get(20);

if (nodeVersion == null || windowsOnArmNodeVersion == null) {
Expand Down Expand Up @@ -389,7 +389,7 @@ jobs:

model-dependent-tests:
name: Model dependent tests
runs-on: ubuntu-24.04
runs-on: macos-13
env:
NODE_LLAMA_CPP_GPU: false
needs:
Expand All @@ -412,10 +412,16 @@ jobs:
name: llama.cpp
path: llama

- name: Install dependencies on Ubuntu
# - name: Install dependencies on Ubuntu
# run: |
# sudo apt-get update
# sudo apt-get install ninja-build cmake

- name: Install dependencies on macOS
if: matrix.config.name == 'macOS'
run: |
sudo apt-get update
sudo apt-get install ninja-build cmake
brew install cmake ninja
alias make=cmake

- name: Install modules
run: npm ci
Expand All @@ -427,18 +433,29 @@ jobs:
run: node ./dist/cli/cli.js inspect gpu

- name: Cache models
id: cache-test-models
uses: actions/cache@v4
id: cache-restore-test-models
uses: actions/cache/restore@v4
with:
path: "test/.models/**.gguf"
key: cache-test-models-${{ runner.os }}-${{ github.workflow }}

- name: Download models or ensure all models are downloaded
id: download-all-test-models
run: npm run dev:setup:downloadAllTestModels

- name: Run model dependent tests
env:
NODE_OPTIONS: "--max-old-space-size=4096"
run: npm run test:modelDependent

- name: Save cached models
id: cache-save-test-models
if: steps.download-all-test-models.outcome == 'success' && always()
uses: actions/cache/save@v4
with:
path: "test/.models/**.gguf"
key: cache-test-models-${{ runner.os }}-${{ github.workflow }}

release:
name: Release
if: needs.resolve-next-release.outputs.next-version != '' && needs.resolve-next-release.outputs.next-version != 'false'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: "18"
node-version: "20"
- name: Install modules
run: npm ci
- name: ESLint
Expand Down
9 changes: 9 additions & 0 deletions docs/guide/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ lldb node -- ./node_modules/.bin/vite-node ./src/cli/cli.ts chat <path-to-a-mode
After it finishes loading, type `run` (or `process launch` if `run` fails) and press Enter for the execution of `node` to start.
When the process crashes, you'll get a stack trace in the terminal.

#### Finding Process Crash Stack Trace for Native Code (Linux) {#native-crash-stack-trace-linux}
To get the stack trace of a crash stemming in `llama.cpp` or the bindings, run `node` with `gdb`:
```shell
gdb --args node ./node_modules/.bin/vite-node ./src/cli/cli.ts chat <path-to-a-model-file-on-your-computer>
```

After it finishes loading, type `run` and press Enter for the execution of `node` to start.
When the process crashes, type `bt full` and press Enter to see the stack trace.

### Updating the Documentation
All the documentation is written in Markdown files in the `docs` directory.
To see the changes you made to the documentation, run the following command:
Expand Down
2 changes: 1 addition & 1 deletion docs/guide/electron.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Trying to use `node-llama-cpp` on a renderer process will crash the application.

You can scaffold an example Electron app that uses `node-llama-cpp` with complete configuration for packaging and distribution by running the following command:
```shell
npm create node-llama-cpp@latest --template electron-typescript-react
npm create node-llama-cpp@latest -- --template electron-typescript-react
```

::: tip
Expand Down
39 changes: 39 additions & 0 deletions docs/guide/function-calling.md
Original file line number Diff line number Diff line change
Expand Up @@ -408,3 +408,42 @@ getFruitPrice({name: "banana"}) result: {name: "banana", price: "$4"};


```

## Troubleshooting {#troubleshooting}
### Function Calling Issues With [`JinjaTemplateChatWrapper`](../api/classes/JinjaTemplateChatWrapper.md) {#troubleshoot-jinja-function-calling-issues}
If function calling doesn't work well (or at all) with a model you're trying to use,
and the [chat wrapper](./chat-wrapper.md) used by your [`LlamaChatSession`](../api/classes/LlamaChatSession.md)
is a [`JinjaTemplateChatWrapper`](../api/classes/JinjaTemplateChatWrapper.md)
(you can check that by accessing [`.chatWrapper`](../api/classes/LlamaChatSession.md#chatwrapper)),
you can try to force it to not use the function calling template defined in the Jinja template.

Doing this can help you achieve better function calling performance with some models.

To do this, create your [`LlamaChatSession`](../api/classes/LlamaChatSession.md) like this:
```typescript
import {fileURLToPath} from "url";
import path from "path";
import {getLlama} from "node-llama-cpp";

const __dirname = path.dirname(fileURLToPath(import.meta.url));

const llama = await getLlama();
const model = await llama.loadModel({
modelPath: path.join(__dirname, "models", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf")
});
const context = await model.createContext();

// ---cut---
import {LlamaChatSession, resolveChatWrapper} from "node-llama-cpp";

const session = new LlamaChatSession({
contextSequence: context.getSequence(),
chatWrapper: resolveChatWrapper(model, {
customWrapperSettings: {
jinjaTemplate: {
functionCallMessageTemplate: "noJinja"
}
}
})
});
```
4 changes: 4 additions & 0 deletions docs/public/giscus/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ body, #__next {
border-start-start-radius: 0;
}

.gsc-comment .gsc-replies {
padding-top: 0;
}

.gsc-reactions-popover {
border-radius: 12px;

Expand Down
3 changes: 2 additions & 1 deletion eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ export default tseslint.config({
]
}],
"@stylistic/no-trailing-spaces": ["off"],
"@stylistic/no-multi-spaces": ["warn"]
"@stylistic/no-multi-spaces": ["warn"],
"@stylistic/generator-star-spacing": ["off"]
}
}, {
files: ["**/**.{,c,m}ts"],
Expand Down
20 changes: 11 additions & 9 deletions llama/addon/AddonContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {

int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();

bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
bool result = llama_kv_self_seq_rm(ctx, sequenceId, -1, -1);

if (!result) {
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
Expand All @@ -602,7 +602,7 @@ Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo&
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
int32_t endPos = info[2].As<Napi::Number>().Int32Value();

bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
bool result = llama_kv_self_seq_rm(ctx, sequenceId, startPos, endPos);

return Napi::Boolean::New(info.Env(), result);
}
Expand All @@ -617,7 +617,7 @@ Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();

llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
llama_kv_self_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);

return info.Env().Undefined();
}
Expand All @@ -639,6 +639,7 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
}

int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
int32_t maxVectorSize = (info.Length() > 1 && info[1].IsNumber()) ? info[1].As<Napi::Number>().Int32Value() : 0;

if (inputTokensLength <= 0) {
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
Expand All @@ -650,15 +651,16 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
if (embeddings == NULL) {
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
}

if (embeddings == NULL) {
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (embeddings == NULL) {
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
return info.Env().Undefined();
}

Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
for (size_t i = 0; i < n_embd; ++i) {
size_t resultSize = maxVectorSize == 0 ? n_embd : std::min(n_embd, maxVectorSize);
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), resultSize);
for (size_t i = 0; i < resultSize; i++) {
result[i] = embeddings[i];
}

Expand Down
Loading
Loading