Skip to content

Commit 314d7e8

Browse files
authored
fix: metadata string encoding (#420)
* fix: metadata string encoding * fix: Vulkan parallel decoding * fix: try auth token on 401 response
1 parent 86e1bee commit 314d7e8

File tree

6 files changed

+43
-38
lines changed

6 files changed

+43
-38
lines changed

docs/guide/external-chat-state.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ You can [save and restore a chat history](./chat-session.md#save-and-restore) on
99
:::
1010

1111
To interact with a model in a chat form, you can use [`LlamaChatSession`](../api/classes/LlamaChatSession.md),
12-
which is stateful chat session that manages the chat state on its own.
12+
which is a stateful chat session that manages the chat state on its own.
1313

14-
When building a library around `node-llama-cpp`, you may want to store that chat state externally and control the evaluations on your own.
14+
When building a library around `node-llama-cpp`, you may want to store that chat state externally and control the evaluations yourself.
1515

1616
This is where [`LlamaChat`](../api/classes/LlamaChat.md) may come in handy.
1717
[`LlamaChat`](../api/classes/LlamaChat.md) Allows you to generate a completion to an existing chat session and manage the evaluation yourself,
@@ -69,9 +69,9 @@ const res = await llamaChat.generateResponse(chatHistory, {
6969
console.log("AI: " + res.response);
7070
```
7171

72-
Now, let say we want to ask the model a follow-up question based on the previous response.
72+
Now, let's say we want to ask the model a follow-up question based on the previous response.
7373
Since we already have a context sequence loaded with the previous chat history,
74-
we'd want to use it as much a possible.
74+
we'd want to reuse it as much a possible.
7575

7676
To do so, we pass the context window of the previous evaluation output to the new evaluation.
7777
This is important, since if a context shift has happened, we want to use the existing post-context-shift context sequence state

src/evaluator/LlamaContext/LlamaContext.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock} from "lifecycle-utils";
1+
import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, Lock, withLock} from "lifecycle-utils";
22
import {removeNullFields} from "../../utils/removeNullFields.js";
33
import {Token} from "../../types.js";
44
import {AddonContext, AddonModelLora, BatchLogitIndex} from "../../bindings/AddonTypes.js";
@@ -32,6 +32,10 @@ const defaultFailedCreationRemedy = {
3232
} as const satisfies Required<LlamaContextOptions["failedCreationRemedy"]>;
3333
const defaultEvaluationPriority: EvaluationPriority = 5;
3434

35+
const decodeSyncWorkaround = {
36+
vulkanLock: {}
37+
};
38+
3539
export class LlamaContext {
3640
/** @internal */ public readonly _llama: Llama;
3741
/** @internal */ public readonly _ctx: AddonContext;
@@ -573,11 +577,17 @@ export class LlamaContext {
573577
return;
574578
}
575579

580+
let decodeLock: Lock | undefined;
581+
// this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel
582+
if (this._llama.gpu === "vulkan")
583+
decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode");
584+
576585
try {
577586
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
578587

579588
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
580589
} finally {
590+
decodeLock?.dispose();
581591
preventDisposalHandle.dispose();
582592
}
583593
}

src/gguf/fileReaders/GgufFileReader.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,7 @@ export abstract class GgufFileReader {
9999
const readLength = valueTypeToBytesToRead.uint8 * length;
100100

101101
return this._withBufferRead(offset, readLength, (resolvedOffset) => {
102-
const res: string[] = [];
103-
104-
for (let i = resolvedOffset; i < resolvedOffset + readLength && i < this._buffer.length; i++)
105-
res.push(String.fromCharCode(this._buffer[i]!));
106-
107-
return res.join("");
102+
return this._buffer.toString("utf8", resolvedOffset, Math.min(resolvedOffset + readLength, this._buffer.length));
108103
});
109104
}
110105

src/gguf/fileReaders/GgufNetworkFetchFileReader.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ export class GgufNetworkFetchFileReader extends GgufFileReader {
104104
signal: this._signal
105105
});
106106

107-
if ((response.status >= 500 || response.status === 429) && headersToTry.length > 0)
107+
if ((response.status >= 500 || response.status === 429 || response.status === 401) && headersToTry.length > 0)
108108
continue;
109109

110110
if (!response.ok)

test/modelDependent/functionary/gguf/__snapshots__/ggufParser.test.ts.snap

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -114,16 +114,16 @@ exports[`gguf > parser > should fetch GGUF metadata 1`] = `
114114
"bos_token_id": 128000,
115115
"eos_token_id": 128001,
116116
"merges": [
117-
"Ä  Ä ",
118-
"Ä  Ä Ä Ä ",
119-
"Ä Ä  Ä Ä ",
120-
"Ä Ä Ä  Ä ",
117+
"Ġ Ġ",
118+
"Ġ ĠĠĠ",
119+
"ĠĠ ĠĠ",
120+
"ĠĠĠ Ġ",
121121
"i n",
122-
"Ä  t",
123-
"Ä  Ä Ä Ä Ä Ä Ä Ä ",
124-
"Ä Ä  Ä Ä Ä Ä Ä Ä ",
125-
"Ä Ä Ä Ä  Ä Ä Ä Ä ",
126-
"Ä Ä Ä  Ä Ä Ä Ä Ä ",
122+
"Ġ t",
123+
"Ġ ĠĠĠĠĠĠĠ",
124+
"ĠĠ ĠĠĠĠĠĠ",
125+
"ĠĠĠĠ ĠĠĠĠ",
126+
"ĠĠĠ ĠĠĠĠĠ",
127127
],
128128
"model": "gpt2",
129129
"padding_token_id": 128001,
@@ -325,16 +325,16 @@ exports[`gguf > parser > should parse local gguf model 1`] = `
325325
"bos_token_id": 128000,
326326
"eos_token_id": 128001,
327327
"merges": [
328-
"Ä  Ä ",
329-
"Ä  Ä Ä Ä ",
330-
"Ä Ä  Ä Ä ",
331-
"Ä Ä Ä  Ä ",
328+
"Ġ Ġ",
329+
"Ġ ĠĠĠ",
330+
"ĠĠ ĠĠ",
331+
"ĠĠĠ Ġ",
332332
"i n",
333-
"Ä  t",
334-
"Ä  Ä Ä Ä Ä Ä Ä Ä ",
335-
"Ä Ä  Ä Ä Ä Ä Ä Ä ",
336-
"Ä Ä Ä Ä  Ä Ä Ä Ä ",
337-
"Ä Ä Ä  Ä Ä Ä Ä Ä ",
333+
"Ġ t",
334+
"Ġ ĠĠĠĠĠĠĠ",
335+
"ĠĠ ĠĠĠĠĠĠ",
336+
"ĠĠĠĠ ĠĠĠĠ",
337+
"ĠĠĠ ĠĠĠĠĠ",
338338
],
339339
"model": "gpt2",
340340
"padding_token_id": 128001,

test/standalone/gguf/__snapshots__/ggufStandaloneParser.test.ts.snap

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,15 @@ exports[`gguf > parser > should parse remote gguf model 1`] = `
8383
"ggml": {
8484
"eos_token_id": 11,
8585
"merges": [
86-
"Ä  t",
87-
"Ä  a",
86+
"Ġ t",
87+
"Ġ a",
8888
"i n",
8989
"h e",
9090
"r e",
9191
"o n",
9292
"e r",
93-
"Ä  s",
94-
"Ä t he",
93+
"Ġ s",
94+
"Ġt he",
9595
"a t",
9696
],
9797
"model": "gpt2",
@@ -229,15 +229,15 @@ exports[`gguf > parser > should parse remote gguf model without tensor info 1`]
229229
"ggml": {
230230
"eos_token_id": 11,
231231
"merges": [
232-
"Ä  t",
233-
"Ä  a",
232+
"Ġ t",
233+
"Ġ a",
234234
"i n",
235235
"h e",
236236
"r e",
237237
"o n",
238238
"e r",
239-
"Ä  s",
240-
"Ä t he",
239+
"Ġ s",
240+
"Ġt he",
241241
"a t",
242242
],
243243
"model": "gpt2",

0 commit comments

Comments
 (0)