Skip to content

Commit ee12b83

Browse files
committed
feat: merge upstream
2 parents 881e2f6 + 1799127 commit ee12b83

37 files changed

+1494
-316
lines changed

.github/ISSUE_TEMPLATE/bug-report.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ description: Report a reproducible bug
33
labels:
44
- requires triage
55
- bug
6+
title: "bug: "
7+
type: "Bug"
68
body:
79
- type: markdown
810
attributes:

.github/ISSUE_TEMPLATE/documentation-issue.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ description: Documentation is unclear or otherwise insufficient.
33
labels:
44
- requires triage
55
- documentation
6+
title: "docs: "
7+
type: "Documentation"
68
body:
79
- type: markdown
810
attributes:

.github/ISSUE_TEMPLATE/feature-request.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ description: Suggest an new idea for this project
33
labels:
44
- requires triage
55
- new feature
6+
title: "feat: "
7+
type: "Feature"
68
body:
79
- type: markdown
810
attributes:

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ node_modules
1414
/.vitepress/.cache
1515
/test/.models
1616
/test/temp
17+
/test/.temp
1718
/temp
1819
/coverage
20+
/test-runner-profile
1921

2022
/llama/compile_commands.json
2123
/llama/llama.cpp

docs/guide/awesome.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue";
1515
* [Manzoni](https://manzoni.app/) ([GitHub](https://github.com/gems-platforms/manzoni-app)) - a text editor running local LLMs
1616
<br /><DataBadge title="License" content="AGPL-3.0"/>
1717

18+
* [Clippy](https://felixrieseberg.github.io/clippy/) ([GitHub](https://github.com/felixrieseberg/clippy)) - Clippy, resurrected from the 1990s, now with some AI
19+
<br /><DataBadge title="License" content="MIT"/>
1820

1921
## Proprietary
2022
* [BashBuddy](https://bashbuddy.run) ([GitHub](https://github.com/wosherco/bashbuddy)) - write bash commands with natural language

docs/guide/chat-session.md

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,87 @@ console.log("AI: " + a2);
446446
```
447447
:::
448448

449+
:::: details Saving and restoring a context sequence evaluation state {#save-and-restore-with-context-sequence-state}
450+
You can also save and restore the context sequence evaluation state to avoid re-evaluating the chat history
451+
when you load it on a new context sequence.
452+
453+
Please note that context sequence state files can get very large (109MB for only 1K tokens).
454+
Using this feature is only recommended when the chat history is very long and you plan to load it often,
455+
or when the evaluation is too slow due to hardware limitations.
456+
457+
::: warning
458+
When loading a context sequence state from a file,
459+
always ensure that the model used to create the context sequence is exactly the same as the one used to save the state file.
460+
461+
Loading a state file created from a different model can crash the process,
462+
thus you have to pass `{acceptRisk: true}` to the [`loadStateFromFile`](../api/classes/LlamaContextSequence.md#loadstatefromfile) method to use it.
463+
464+
Use with caution.
465+
:::
466+
467+
::: code-group
468+
```typescript [Save chat history and context sequence state]
469+
import {fileURLToPath} from "url";
470+
import path from "path";
471+
import fs from "fs/promises";
472+
import {getLlama, LlamaChatSession} from "node-llama-cpp";
473+
474+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
475+
476+
const llama = await getLlama();
477+
const model = await llama.loadModel({
478+
modelPath: path.join(__dirname, "models", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf")
479+
});
480+
const context = await model.createContext();
481+
const contextSequence = context.getSequence();
482+
const session = new LlamaChatSession({contextSequence});
483+
484+
485+
const q1 = "Hi there, how are you?";
486+
console.log("User: " + q1);
487+
488+
const a1 = await session.prompt(q1);
489+
console.log("AI: " + a1);
490+
491+
const chatHistory = session.getChatHistory();// [!code highlight]
492+
await Promise.all([// [!code highlight]
493+
contextSequence.saveStateToFile("state.bin"),// [!code highlight]
494+
fs.writeFile("chatHistory.json", JSON.stringify(chatHistory), "utf8")// [!code highlight]
495+
]);// [!code highlight]
496+
```
497+
:::
498+
499+
::: code-group
500+
```typescript [Restore chat history and context sequence state]
501+
import {fileURLToPath} from "url";
502+
import path from "path";
503+
import fs from "fs/promises";
504+
import {getLlama, LlamaChatSession} from "node-llama-cpp";
505+
506+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
507+
// ---cut---
508+
const llama = await getLlama();
509+
const model = await llama.loadModel({
510+
modelPath: path.join(__dirname, "models", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf")
511+
});
512+
const context = await model.createContext();
513+
const contextSequence = context.getSequence();
514+
const session = new LlamaChatSession({contextSequence});
515+
516+
await contextSequence.loadStateFromFile("state.bin", {acceptRisk: true});// [!code highlight]
517+
const chatHistory = JSON.parse(await fs.readFile("chatHistory.json", "utf8"));// [!code highlight]
518+
session.setChatHistory(chatHistory);// [!code highlight]
519+
520+
const q2 = "Summarize what you said";
521+
console.log("User: " + q2);
522+
523+
const a2 = await session.prompt(q2);
524+
console.log("AI: " + a2);
525+
```
526+
:::
527+
528+
::::
529+
449530
## Prompt Without Updating Chat History {#prompt-without-updating-chat-history}
450531
Prompt without saving the prompt to the chat history.
451532

docs/guide/low-level-api.md

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,85 @@ console.log(
391391
newTokens
392392
);
393393
```
394+
395+
### Save and Restore State {#save-and-restore-state}
396+
You can save the evaluation state of a context sequence to then later load it back.
397+
398+
This is useful for avoiding the evaluation of tokens that you've already evaluated in the past.
399+
400+
::: warning
401+
When loading a context sequence state from a file,
402+
always ensure that the model used to create the context sequence is exactly the same as the one used to save the state file.
403+
404+
Loading a state file created from a different model can crash the process,
405+
thus you have to pass `{acceptRisk: true}` to the [`loadStateFromFile`](../api/classes/LlamaContextSequence.md#loadstatefromfile) method to use it.
406+
407+
Use with caution.
408+
:::
409+
410+
::: code-group
411+
```typescript [Save state]
412+
import {fileURLToPath} from "url";
413+
import path from "path";
414+
import {getLlama} from "node-llama-cpp";
415+
416+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
417+
418+
const llama = await getLlama();
419+
const model = await llama.loadModel({
420+
modelPath: path.join(__dirname, "models", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf")
421+
});
422+
const context = await model.createContext();
423+
const sequence = context.getSequence();
424+
425+
const input = "The best way to";
426+
const tokens = model.tokenize(input);
427+
await sequence.evaluateWithoutGeneratingNewTokens(tokens);
428+
429+
console.log(
430+
"Current state:",
431+
model.detokenize(sequence.contextTokens, true),
432+
sequence.contextTokens
433+
);
434+
435+
await sequence.saveStateToFile("state.bin");// [!code highlight]
436+
```
437+
:::
438+
439+
::: code-group
440+
```typescript [Load state]
441+
import {fileURLToPath} from "url";
442+
import path from "path";
443+
import {getLlama, Token} from "node-llama-cpp";
444+
445+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
446+
// ---cut---
447+
const llama = await getLlama();
448+
const model = await llama.loadModel({
449+
modelPath: path.join(__dirname, "models", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf")
450+
});
451+
const context = await model.createContext();
452+
const sequence = context.getSequence();
453+
454+
await sequence.loadStateFromFile("state.bin", {acceptRisk: true});// [!code highlight]
455+
456+
console.log(
457+
"Loaded state:",
458+
model.detokenize(sequence.contextTokens, true),
459+
sequence.contextTokens
460+
);
461+
462+
const input = " find";
463+
const inputTokens = model.tokenize(input);
464+
const maxTokens = 10;
465+
const res: Token[] = [];
466+
for await (const token of sequence.evaluate(inputTokens)) {
467+
res.push(token);
468+
469+
if (res.length >= maxTokens)
470+
break;
471+
}
472+
473+
console.log("Result:", model.detokenize(res));
474+
```
475+
:::

llama/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ if (NLC_CURRENT_PLATFORM STREQUAL "win-x64" OR NLC_CURRENT_PLATFORM STREQUAL "wi
44
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
55
endif()
66

7+
if (NLC_CURRENT_PLATFORM STREQUAL "win-x64")
8+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
9+
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL" CACHE STRING "" FORCE)
10+
else()
11+
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL" CACHE STRING "" FORCE)
12+
endif()
13+
endif()
14+
715
if (NLC_TARGET_PLATFORM STREQUAL "win-arm64" AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config") AND NOT MINGW)
816
if(NLC_CURRENT_PLATFORM STREQUAL "win-x64")
917
include("./profiles/llvm.win32.host-x64.target-arm64.cmake")

llama/addon/AddonContext.cpp

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,143 @@ Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
702702
return info.Env().Undefined();
703703
}
704704

705+
class AddonContextSaveSequenceStateToFileWorker : public Napi::AsyncWorker {
706+
public:
707+
AddonContext* context;
708+
std::string filepath;
709+
llama_seq_id sequenceId;
710+
std::vector<llama_token> tokens;
711+
size_t savedFileSize = 0;
712+
713+
AddonContextSaveSequenceStateToFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
714+
: Napi::AsyncWorker(info.Env(), "AddonContextSaveSequenceStateToFileWorker"),
715+
context(context),
716+
deferred(Napi::Promise::Deferred::New(info.Env())) {
717+
context->Ref();
718+
719+
filepath = info[0].As<Napi::String>().Utf8Value();
720+
sequenceId = info[1].As<Napi::Number>().Int32Value();
721+
Napi::Uint32Array inputTokens = info[2].As<Napi::Uint32Array>();
722+
723+
tokens.resize(inputTokens.ElementLength());
724+
for (size_t i = 0; i < tokens.size(); i++) {
725+
tokens[i] = inputTokens[i];
726+
}
727+
}
728+
~AddonContextSaveSequenceStateToFileWorker() {
729+
context->Unref();
730+
}
731+
732+
Napi::Promise GetPromise() {
733+
return deferred.Promise();
734+
}
735+
736+
protected:
737+
Napi::Promise::Deferred deferred;
738+
739+
void Execute() {
740+
try {
741+
savedFileSize = llama_state_seq_save_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size());
742+
if (savedFileSize == 0) {
743+
SetError("Failed to save state to file");
744+
return;
745+
}
746+
} catch (const std::exception& e) {
747+
SetError(e.what());
748+
} catch(...) {
749+
SetError("Unknown error when calling \"llama_state_seq_save_file\"");
750+
}
751+
}
752+
void OnOK() {
753+
deferred.Resolve(Napi::Number::New(Env(), savedFileSize));
754+
}
755+
void OnError(const Napi::Error& err) {
756+
deferred.Reject(err.Value());
757+
}
758+
};
759+
Napi::Value AddonContext::SaveSequenceStateToFile(const Napi::CallbackInfo& info) {
760+
if (disposed) {
761+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
762+
return info.Env().Undefined();
763+
}
764+
765+
AddonContextSaveSequenceStateToFileWorker* worker = new AddonContextSaveSequenceStateToFileWorker(info, this);
766+
worker->Queue();
767+
return worker->GetPromise();
768+
}
769+
770+
class AddonContextLoadSequenceStateFromFileWorker : public Napi::AsyncWorker {
771+
public:
772+
AddonContext* context;
773+
std::string filepath;
774+
llama_seq_id sequenceId;
775+
size_t maxContextSize;
776+
std::vector<llama_token> tokens;
777+
778+
AddonContextLoadSequenceStateFromFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
779+
: Napi::AsyncWorker(info.Env(), "AddonContextLoadSequenceStateFromFileWorker"),
780+
context(context),
781+
deferred(Napi::Promise::Deferred::New(info.Env())) {
782+
context->Ref();
783+
784+
filepath = info[0].As<Napi::String>().Utf8Value();
785+
sequenceId = info[1].As<Napi::Number>().Int32Value();
786+
maxContextSize = info[2].As<Napi::Number>().Uint32Value();
787+
788+
tokens.resize(maxContextSize);
789+
}
790+
~AddonContextLoadSequenceStateFromFileWorker() {
791+
context->Unref();
792+
}
793+
794+
Napi::Promise GetPromise() {
795+
return deferred.Promise();
796+
}
797+
798+
protected:
799+
Napi::Promise::Deferred deferred;
800+
801+
void Execute() {
802+
try {
803+
size_t tokenCount = 0;
804+
const size_t fileSize = llama_state_seq_load_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size(), &tokenCount);
805+
if (fileSize == 0) {
806+
SetError("Failed to load state from file. Current context sequence size may be smaller that the state of the file");
807+
return;
808+
}
809+
810+
tokens.resize(tokenCount);
811+
} catch (const std::exception& e) {
812+
SetError(e.what());
813+
} catch(...) {
814+
SetError("Unknown error when calling \"llama_state_seq_load_file\"");
815+
}
816+
}
817+
void OnOK() {
818+
size_t tokenCount = tokens.size();
819+
Napi::Uint32Array result = Napi::Uint32Array::New(Env(), tokenCount);
820+
821+
for (size_t i = 0; i < tokenCount; i++) {
822+
result[i] = tokens[i];
823+
}
824+
825+
deferred.Resolve(result);
826+
}
827+
void OnError(const Napi::Error& err) {
828+
deferred.Reject(err.Value());
829+
}
830+
};
831+
Napi::Value AddonContext::LoadSequenceStateFromFile(const Napi::CallbackInfo& info) {
832+
if (disposed) {
833+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
834+
return info.Env().Undefined();
835+
}
836+
837+
AddonContextLoadSequenceStateFromFileWorker* worker = new AddonContextLoadSequenceStateFromFileWorker(info, this);
838+
worker->Queue();
839+
return worker->GetPromise();
840+
}
841+
705842
Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
706843
llama_perf_context_print(ctx);
707844
llama_perf_context_reset(ctx);
@@ -797,6 +934,8 @@ void AddonContext::init(Napi::Object exports) {
797934
InstanceMethod("setThreads", &AddonContext::SetThreads),
798935
InstanceMethod("printTimings", &AddonContext::PrintTimings),
799936
InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
937+
InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
938+
InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
800939
InstanceMethod("setLora", &AddonContext::SetLora),
801940
InstanceMethod("dispose", &AddonContext::Dispose),
802941
}

llama/addon/AddonContext.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
4444
Napi::Value GetThreads(const Napi::CallbackInfo& info);
4545
Napi::Value SetThreads(const Napi::CallbackInfo& info);
4646

47+
Napi::Value SaveSequenceStateToFile(const Napi::CallbackInfo& info);
48+
Napi::Value LoadSequenceStateFromFile(const Napi::CallbackInfo& info);
49+
4750
Napi::Value PrintTimings(const Napi::CallbackInfo& info);
4851
Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
4952

0 commit comments

Comments
 (0)