Skip to content

Commit c070e81

Browse files
authored
feat: extract function calling syntax from a Jinja template (#444)
* feat: extract function calling syntax from a Jinja template * feat: Full support for Qwen and QwQ via `QwenChatWrapper` * feat: export a `llama` instance getter on a model instance * fix: better handling for function calling with empty parameters * fix: reranking edge case crash * fix: limit the context size by default in the node-typescript template * fix: adapt to breaking `llama.cpp` changes * fix: bump min nodejs version to 20 * fix: better husky setup * fix: `defineChatSessionFunction` type * docs: troubleshooting function calling when using `JinjaTemplateChatWrapper` * docs: fix the command to scaffold a new Electron project * docs: debugging a native crash on Linux * test: make `LlamaText` snapshots more readable * chore: update modules
1 parent ee94403 commit c070e81

File tree

93 files changed

+6676
-3329
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+6676
-3329
lines changed

.github/workflows/build.yml

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ jobs:
188188
189189
const {versions: latestNodeVersions} = await getLatestNodeVersions(Date.now() - 1000 * 60 * 60 * 24 * 14);
190190
191-
const nodeVersion = latestNodeVersions.get(18);
191+
const nodeVersion = latestNodeVersions.get(20);
192192
const windowsOnArmNodeVersion = latestNodeVersions.get(20);
193193
194194
if (nodeVersion == null || windowsOnArmNodeVersion == null) {
@@ -389,7 +389,7 @@ jobs:
389389

390390
model-dependent-tests:
391391
name: Model dependent tests
392-
runs-on: ubuntu-24.04
392+
runs-on: macos-13
393393
env:
394394
NODE_LLAMA_CPP_GPU: false
395395
needs:
@@ -412,10 +412,16 @@ jobs:
412412
name: llama.cpp
413413
path: llama
414414

415-
- name: Install dependencies on Ubuntu
415+
# - name: Install dependencies on Ubuntu
416+
# run: |
417+
# sudo apt-get update
418+
# sudo apt-get install ninja-build cmake
419+
420+
- name: Install dependencies on macOS
421+
if: matrix.config.name == 'macOS'
416422
run: |
417-
sudo apt-get update
418-
sudo apt-get install ninja-build cmake
423+
brew install cmake ninja
424+
alias make=cmake
419425
420426
- name: Install modules
421427
run: npm ci
@@ -427,18 +433,29 @@ jobs:
427433
run: node ./dist/cli/cli.js inspect gpu
428434

429435
- name: Cache models
430-
id: cache-test-models
431-
uses: actions/cache@v4
436+
id: cache-restore-test-models
437+
uses: actions/cache/restore@v4
432438
with:
433439
path: "test/.models/**.gguf"
434440
key: cache-test-models-${{ runner.os }}-${{ github.workflow }}
435441

436442
- name: Download models or ensure all models are downloaded
443+
id: download-all-test-models
437444
run: npm run dev:setup:downloadAllTestModels
438445

439446
- name: Run model dependent tests
447+
env:
448+
NODE_OPTIONS: "--max-old-space-size=4096"
440449
run: npm run test:modelDependent
441450

451+
- name: Save cached models
452+
id: cache-save-test-models
453+
if: steps.download-all-test-models.outcome == 'success' && always()
454+
uses: actions/cache/save@v4
455+
with:
456+
path: "test/.models/**.gguf"
457+
key: cache-test-models-${{ runner.os }}-${{ github.workflow }}
458+
442459
release:
443460
name: Release
444461
if: needs.resolve-next-release.outputs.next-version != '' && needs.resolve-next-release.outputs.next-version != 'false'

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
- uses: actions/checkout@v4
1616
- uses: actions/setup-node@v4
1717
with:
18-
node-version: "18"
18+
node-version: "20"
1919
- name: Install modules
2020
run: npm ci
2121
- name: ESLint

docs/guide/development.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,15 @@ lldb node -- ./node_modules/.bin/vite-node ./src/cli/cli.ts chat <path-to-a-mode
7979
After it finishes loading, type `run` (or `process launch` if `run` fails) and press Enter for the execution of `node` to start.
8080
When the process crashes, you'll get a stack trace in the terminal.
8181

82+
#### Finding Process Crash Stack Trace for Native Code (Linux) {#native-crash-stack-trace-linux}
83+
To get the stack trace of a crash stemming in `llama.cpp` or the bindings, run `node` with `gdb`:
84+
```shell
85+
gdb --args node ./node_modules/.bin/vite-node ./src/cli/cli.ts chat <path-to-a-model-file-on-your-computer>
86+
```
87+
88+
After it finishes loading, type `run` and press Enter for the execution of `node` to start.
89+
When the process crashes, type `bt full` and press Enter to see the stack trace.
90+
8291
### Updating the Documentation
8392
All the documentation is written in Markdown files in the `docs` directory.
8493
To see the changes you made to the documentation, run the following command:

docs/guide/function-calling.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,3 +408,42 @@ getFruitPrice({name: "banana"}) result: {name: "banana", price: "$4"};
408408
409409
410410
```
411+
412+
## Troubleshooting {#troubleshooting}
413+
### Function Calling Issues With [`JinjaTemplateChatWrapper`](../api/classes/JinjaTemplateChatWrapper.md) {#troubleshoot-jinja-function-calling-issues}
414+
If function calling doesn't work well (or at all) with a model you're trying to use,
415+
and the [chat wrapper](./chat-wrapper.md) used by your [`LlamaChatSession`](../api/classes/LlamaChatSession.md)
416+
is a [`JinjaTemplateChatWrapper`](../api/classes/JinjaTemplateChatWrapper.md)
417+
(you can check that by accessing [`.chatWrapper`](../api/classes/LlamaChatSession.md#chatwrapper)),
418+
you can try to force it to not use the function calling template defined in the Jinja template.
419+
420+
Doing this can help you achieve better function calling performance with some models.
421+
422+
To do this, create your [`LlamaChatSession`](../api/classes/LlamaChatSession.md) like this:
423+
```typescript
424+
import {fileURLToPath} from "url";
425+
import path from "path";
426+
import {getLlama} from "node-llama-cpp";
427+
428+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
429+
430+
const llama = await getLlama();
431+
const model = await llama.loadModel({
432+
modelPath: path.join(__dirname, "models", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf")
433+
});
434+
const context = await model.createContext();
435+
436+
// ---cut---
437+
import {LlamaChatSession, resolveChatWrapper} from "node-llama-cpp";
438+
439+
const session = new LlamaChatSession({
440+
contextSequence: context.getSequence(),
441+
chatWrapper: resolveChatWrapper(model, {
442+
customWrapperSettings: {
443+
jinjaTemplate: {
444+
functionCallMessageTemplate: "noJinja"
445+
}
446+
}
447+
})
448+
});
449+
```

docs/public/giscus/style.css

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ body, #__next {
3131
border-start-start-radius: 0;
3232
}
3333

34+
.gsc-comment .gsc-replies {
35+
padding-top: 0;
36+
}
37+
3438
.gsc-reactions-popover {
3539
border-radius: 12px;
3640

eslint.config.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ export default tseslint.config({
148148
]
149149
}],
150150
"@stylistic/no-trailing-spaces": ["off"],
151-
"@stylistic/no-multi-spaces": ["warn"]
151+
"@stylistic/no-multi-spaces": ["warn"],
152+
"@stylistic/generator-star-spacing": ["off"]
152153
}
153154
}, {
154155
files: ["**/**.{,c,m}ts"],

llama/addon/AddonContext.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
583583

584584
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
585585

586-
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
586+
bool result = llama_kv_self_seq_rm(ctx, sequenceId, -1, -1);
587587

588588
if (!result) {
589589
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
@@ -602,7 +602,7 @@ Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo&
602602
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
603603
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
604604

605-
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
605+
bool result = llama_kv_self_seq_rm(ctx, sequenceId, startPos, endPos);
606606

607607
return Napi::Boolean::New(info.Env(), result);
608608
}
@@ -617,7 +617,7 @@ Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info
617617
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
618618
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
619619

620-
llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
620+
llama_kv_self_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
621621

622622
return info.Env().Undefined();
623623
}
@@ -639,6 +639,7 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
639639
}
640640

641641
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
642+
int32_t maxVectorSize = (info.Length() > 1 && info[1].IsNumber()) ? info[1].As<Napi::Number>().Int32Value() : 0;
642643

643644
if (inputTokensLength <= 0) {
644645
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
@@ -650,15 +651,16 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
650651
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
651652
if (embeddings == NULL) {
652653
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
654+
}
653655

654-
if (embeddings == NULL) {
655-
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
656-
return info.Env().Undefined();
657-
}
656+
if (embeddings == NULL) {
657+
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
658+
return info.Env().Undefined();
658659
}
659660

660-
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
661-
for (size_t i = 0; i < n_embd; ++i) {
661+
size_t resultSize = maxVectorSize == 0 ? n_embd : std::min(n_embd, maxVectorSize);
662+
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), resultSize);
663+
for (size_t i = 0; i < resultSize; i++) {
662664
result[i] = embeddings[i];
663665
}
664666

0 commit comments

Comments
 (0)