Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
aa5b6e0
feat: extract function calling syntax from the jinja template, QwenCh…
giladgd Mar 20, 2025
4f3b017
fix: limit the context size by default in the node-typescript template
giladgd Mar 20, 2025
1887300
docs: style fix
giladgd Mar 20, 2025
46f0922
docs: troubleshooting function calling when using `JinjaTemplateChatW…
giladgd Mar 20, 2025
f7dae11
docs: fix the command to scaffold a new Electron project
giladgd Mar 20, 2025
d4e1603
fix: adapt to breaking `llama.cpp` changes
giladgd Mar 20, 2025
c1610b8
docs: fix types
giladgd Mar 20, 2025
df96282
fix: better handling for function calling with empty parameters
giladgd Mar 20, 2025
4c1567c
feat: export a `llama` instance getter on a model instance
giladgd Mar 20, 2025
b2bdacc
fix: small optimization
giladgd Mar 20, 2025
a8cc9c0
test: make LlamaText snapshots more readable
giladgd Mar 20, 2025
f89d98a
test: add tests
giladgd Mar 20, 2025
6f043ec
docs: update ID
giladgd Mar 20, 2025
d880b1d
feat: add QwQ 32B to recommended models
giladgd Mar 20, 2025
9a38355
build: run tests on ubuntu arm machine
giladgd Mar 21, 2025
1af81b2
build: change machine type used to run tests on
giladgd Mar 21, 2025
e7c176c
build: change machine type used to run tests on
giladgd Mar 21, 2025
441a8ce
build: make tests run correctly
giladgd Mar 21, 2025
93a41a7
build: cache models on test fail
giladgd Mar 21, 2025
f0e7f0a
build: dispose the llama instance before a test worker exists
giladgd Mar 22, 2025
388542b
chore: update vitest
giladgd Mar 22, 2025
845736a
fix: better husky setup
giladgd Mar 22, 2025
15590c3
test: fix hook
giladgd Mar 22, 2025
fb4c4df
fix: reranking edge case crash
giladgd Mar 22, 2025
dcf795d
fix: prepare script
giladgd Mar 22, 2025
c5ce43e
fix: vitest config
giladgd Mar 22, 2025
817c820
test: remove dispose hook
giladgd Mar 22, 2025
ff8170f
fix: bump min nodejs version to 20
giladgd Mar 22, 2025
6cd8d95
docs: debugging native crash on Linux
giladgd Mar 22, 2025
b9726e6
chore: update modules
giladgd Mar 22, 2025
a8dff21
fix: log level
giladgd Mar 22, 2025
6ccbca3
chore: configure eslint
giladgd Mar 22, 2025
c3c0066
fix: `defineChatSessionFunction` type
giladgd Mar 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/guide/electron.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Trying to use `node-llama-cpp` on a renderer process will crash the application.

You can scaffold an example Electron app that uses `node-llama-cpp` with complete configuration for packaging and distribution by running the following command:
```shell
npm create node-llama-cpp@latest --template electron-typescript-react
npm create node-llama-cpp@latest -- --template electron-typescript-react
```

::: tip
Expand Down
39 changes: 39 additions & 0 deletions docs/guide/function-calling.md
Original file line number Diff line number Diff line change
Expand Up @@ -408,3 +408,42 @@ getFruitPrice({name: "banana"}) result: {name: "banana", price: "$4"};


```

## Troubleshooting {#troubleshooting}
### Function Calling Issues With [`JinjaTemplateChatWrapper`](../api/classes/JinjaTemplateChatWrapper.md) {#troubleshoot-jinja-function-calling-issues}
If function calling doesn't work well (or at all) with a model you're trying to use,
and the [chat wrapper](./chat-wrapper.md) used by your [`LlamaChatSession`](../api/classes/LlamaChatSession.md)
is a [`JinjaTemplateChatWrapper`](../api/classes/JinjaTemplateChatWrapper.md)
(you can check that by accessing [`.chatWrapper`](../api/classes/LlamaChatSession.md#chatwrapper)),
you can try to force it to not use the function calling template defined in the Jinja template.

Doing this can help you achieve better function calling performance with some models.

To do this, create your [`LlamaChatSession`](../api/classes/LlamaChatSession.md) like this:
```typescript
import {fileURLToPath} from "url";
import path from "path";
import {getLlama} from "node-llama-cpp";

const __dirname = path.dirname(fileURLToPath(import.meta.url));

const llama = await getLlama();
const model = await llama.loadModel({
modelPath: path.join(__dirname, "models", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf")
});
const context = await model.createContext();

// ---cut---
import {LlamaChatSession, resolveChatWrapper} from "node-llama-cpp";

const session = new LlamaChatSession({
contextSequence: context.getSequence(),
chatWrapper: resolveChatWrapper(model, {
customWrapperSettings: {
jinjaTemplate: {
functionCallMessageTemplate: "noJinja"
}
}
})
});
```
4 changes: 4 additions & 0 deletions docs/public/giscus/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ body, #__next {
border-start-start-radius: 0;
}

.gsc-comment .gsc-replies {
padding-top: 0;
}

.gsc-reactions-popover {
border-radius: 12px;

Expand Down
6 changes: 3 additions & 3 deletions llama/addon/AddonContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {

int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();

bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
bool result = llama_kv_self_seq_rm(ctx, sequenceId, -1, -1);

if (!result) {
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
Expand All @@ -602,7 +602,7 @@ Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo&
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
int32_t endPos = info[2].As<Napi::Number>().Int32Value();

bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
bool result = llama_kv_self_seq_rm(ctx, sequenceId, startPos, endPos);

return Napi::Boolean::New(info.Env(), result);
}
Expand All @@ -617,7 +617,7 @@ Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();

llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
llama_kv_self_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);

return info.Env().Undefined();
}
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@
"catai",
"mistral",
"deepseek",
"qwen",
"qwq",
"typescript",
"lora",
"batching",
Expand Down
46 changes: 24 additions & 22 deletions src/ChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {ChatModelFunctionsDocumentationGenerator} from "./chatWrappers/utils/Cha
import {jsonDumps} from "./chatWrappers/utils/jsonDumps.js";
import {defaultChatSystemPrompt} from "./config.js";
import {getChatWrapperSegmentDefinition} from "./utils/getChatWrapperSegmentDefinition.js";
import type {JinjaTemplateChatWrapperOptions} from "./chatWrappers/generic/JinjaTemplateChatWrapper.js";

export abstract class ChatWrapper {
public static defaultSettings: ChatWrapperSettings = {
Expand All @@ -17,13 +18,15 @@ export abstract class ChatWrapper {
optionalPrefixSpace: true,
prefix: "||call: ",
paramsPrefix: LlamaText(new SpecialTokensText("(")),
suffix: LlamaText(new SpecialTokensText(")"))
suffix: LlamaText(new SpecialTokensText(")")),
emptyCallParamsPlaceholder: ""
},
result: {
prefix: LlamaText(new SpecialTokensText("\n"), "||result: "),
suffix: LlamaText(new SpecialTokensText("\n"))
}
}
},
segments: {}
};

public abstract readonly wrapperName: string;
Expand Down Expand Up @@ -102,13 +105,16 @@ export abstract class ChatWrapper {
}

public generateFunctionCall(name: string, params: any): LlamaText {
const emptyCallParamsPlaceholder = this.settings.functions.call.emptyCallParamsPlaceholder;
return LlamaText([
this.settings.functions.call.prefix,
name,
this.settings.functions.call.paramsPrefix,
(
params === undefined
? ""
? (emptyCallParamsPlaceholder === undefined || emptyCallParamsPlaceholder === "")
? ""
: jsonDumps(emptyCallParamsPlaceholder)
: jsonDumps(params)
),
this.settings.functions.call.suffix
Expand Down Expand Up @@ -139,7 +145,7 @@ export abstract class ChatWrapper {
]);
}

public generateModelResponseText(modelResponse: ChatModelResponse["response"], useRawCall: boolean = true): LlamaText {
public generateModelResponseText(modelResponse: ChatModelResponse["response"], useRawValues: boolean = true): LlamaText {
const res: LlamaText[] = [];
const pendingFunctionCalls: ChatModelFunctionCall[] = [];
const segmentStack: ChatModelSegmentType[] = [];
Expand All @@ -150,7 +156,7 @@ export abstract class ChatWrapper {
if (pendingFunctionCalls.length === 0)
return;

res.push(this.generateFunctionCallsAndResults(pendingFunctionCalls, useRawCall));
res.push(this.generateFunctionCallsAndResults(pendingFunctionCalls, useRawValues));
pendingFunctionCalls.length = 0;
needsToAddSegmentReminder = true;
};
Expand Down Expand Up @@ -180,11 +186,10 @@ export abstract class ChatWrapper {
} else if (isChatModelResponseSegment(response)) {
addFunctionCalls();

if (response.raw != null && useRawCall)
const segmentDefinition = getChatWrapperSegmentDefinition(this.settings, response.segmentType);
if (response.raw != null && useRawValues)
res.push(LlamaText.fromJSON(response.raw));
else {
const segmentDefinition = getChatWrapperSegmentDefinition(this.settings, response.segmentType);

else
res.push(
LlamaText([
(segmentStack.length > 0 && segmentStack.at(-1) === response.segmentType)
Expand All @@ -197,12 +202,15 @@ export abstract class ChatWrapper {
])
);

lastSegmentEndedWithoutSuffix = response.ended && segmentDefinition?.suffix == null;
lastSegmentEndedWithoutSuffix = response.ended && segmentDefinition?.suffix == null;

if (!response.ended && segmentStack.at(-1) !== response.segmentType)
segmentStack.push(response.segmentType);
else if (response.ended && segmentStack.at(-1) === response.segmentType) {
segmentStack.pop();

if (!response.ended)
segmentStack.push(response.segmentType);
else if (segmentStack.at(-1) === response.segmentType)
segmentStack.pop();
if (segmentStack.length === 0 && segmentDefinition?.suffix == null && this.settings.segments?.closeAllSegments != null)
res.push(LlamaText(this.settings.segments.closeAllSegments));
}

continue;
Expand Down Expand Up @@ -277,9 +285,7 @@ export abstract class ChatWrapper {
}

/** @internal */
public static _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): (
Array<Record<string | symbol, any> | [testConfig: Record<string | symbol, any>, applyConfig: Record<string | symbol, any>]>
) {
public static _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
return [{}] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
}

Expand All @@ -293,13 +299,9 @@ type FirstItemOfTupleOrFallback<T extends any[], Fallback> = T extends [infer U,

export type ChatWrapperJinjaMatchConfiguration<T extends typeof ChatWrapper> = Array<
FirstItemOfTupleOrFallback<ConstructorParameters<T>, object> |
[
testConfig: FirstItemOfTupleOrFallback<ConstructorParameters<T>, object>,
applyConfig: FirstItemOfTupleOrFallback<ConstructorParameters<T>, object>
] |
[
testConfig: FirstItemOfTupleOrFallback<ConstructorParameters<T>, object>,
applyConfig: FirstItemOfTupleOrFallback<ConstructorParameters<T>, object>,
testJinjaParameters: Record<string, any>
testJinjaChatWrapperOptions?: JinjaTemplateChatWrapperOptions
]
>;
4 changes: 2 additions & 2 deletions src/chatWrappers/AlpacaChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ export class AlpacaChatWrapper extends GeneralChatWrapper {
}

/** @internal */
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
return [
{},
{allowSpecialTokensInTitles: true}
] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
];
}
}
16 changes: 8 additions & 8 deletions src/chatWrappers/DeepSeekChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -348,14 +348,14 @@ export class DeepSeekChatWrapper extends ChatWrapper {
}

/** @internal */
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
return [
{},
{keepOnlyLastThought: true},
{functionCallingSyntax: "simplified"},
{functionCallingSyntax: "simplified", keepOnlyLastThought: true},
{functionCallingSyntax: "original"},
{functionCallingSyntax: "original", keepOnlyLastThought: true}
] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
[undefined, {}, {functionCallMessageTemplate: "noJinja"}],
[undefined, {keepOnlyLastThought: true}, {functionCallMessageTemplate: "noJinja"}],
[undefined, {functionCallingSyntax: "simplified"}, {functionCallMessageTemplate: "noJinja"}],
[undefined, {functionCallingSyntax: "simplified", keepOnlyLastThought: true}, {functionCallMessageTemplate: "noJinja"}],
[undefined, {functionCallingSyntax: "original"}, {functionCallMessageTemplate: "noJinja"}],
[undefined, {functionCallingSyntax: "original", keepOnlyLastThought: true}, {functionCallMessageTemplate: "noJinja"}]
];
}
}
4 changes: 2 additions & 2 deletions src/chatWrappers/FalconChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,10 @@ export class FalconChatWrapper extends ChatWrapper {
}

/** @internal */
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
return [
{},
{allowSpecialTokensInTitles: true}
] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
];
}
}
6 changes: 3 additions & 3 deletions src/chatWrappers/FunctionaryChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
: LlamaText([
new SpecialTokensText(">>>all\n"),
response,
(isLastItem && isLastResponse)
(!isLastResponse || isLastItem)
? LlamaText([])
: new SpecialToken("EOT")
])
Expand Down Expand Up @@ -726,12 +726,12 @@ export class FunctionaryChatWrapper extends ChatWrapper {
}

/** @internal */
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
return [
{variation: "v3"},
{variation: "v2.llama3"},
{variation: "v2"}
] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
];
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/chatWrappers/GeneralChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,10 @@ export class GeneralChatWrapper extends ChatWrapper {
}

/** @internal */
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
return [
{},
{allowSpecialTokensInTitles: true}
] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
];
}
}
4 changes: 2 additions & 2 deletions src/chatWrappers/Llama2ChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ export class Llama2ChatWrapper extends ChatWrapper {
}

/** @internal */
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
return [
{addSpaceBeforeEos: false},
{addSpaceBeforeEos: true}
] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
];
}
}
32 changes: 21 additions & 11 deletions src/chatWrappers/Llama3_1ChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -341,15 +341,19 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
}

/** @internal */
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
return [
{},
[{todayDate: null}, {}],
[{cuttingKnowledgeDate: null}, {}],
[{noToolInstructions: true}, {}],
[{todayDate: null, cuttingKnowledgeDate: null}, {}],
[{todayDate: null, cuttingKnowledgeDate: null, noToolInstructions: true}, {}],
[{todayDate: new Date("2024-07-26T00:00:00"), cuttingKnowledgeDate: null, noToolInstructions: true}, {}],
[{}, undefined, {functionCallMessageTemplate: "noJinja"}],
[{todayDate: null}, {}, {functionCallMessageTemplate: "noJinja"}],
[{cuttingKnowledgeDate: null}, {}, {functionCallMessageTemplate: "noJinja"}],
[{noToolInstructions: true}, {}, {functionCallMessageTemplate: "noJinja"}],
[{todayDate: null, cuttingKnowledgeDate: null}, {}, {functionCallMessageTemplate: "noJinja"}],
[{todayDate: null, cuttingKnowledgeDate: null, noToolInstructions: true}, {}, {functionCallMessageTemplate: "noJinja"}],
[
{todayDate: new Date("2024-07-26T00:00:00"), cuttingKnowledgeDate: null, noToolInstructions: true},
{},
{functionCallMessageTemplate: "noJinja"}
],

[
{
Expand All @@ -358,7 +362,10 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
noToolInstructions: true
},
{cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z")},
{"date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined)}
{
additionalRenderParameters: {"date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined)},
functionCallMessageTemplate: "noJinja"
}
],

[
Expand All @@ -369,9 +376,12 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
_specialTokensTextForPreamble: true
},
{cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z")},
{"date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined)}
{
additionalRenderParameters: {"date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined)},
functionCallMessageTemplate: "noJinja"
}
]
] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
];
}
}

Expand Down
Loading
Loading