axonzeta
diff --git a/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/documentation-issue.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/documentation-issue.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/feature-request.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/feature-request.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/guide/awesome.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/guide/awesome.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/guide/chat-session.md‎
Lines changed: 81 additions & 0 deletions b/‎docs/guide/chat-session.md‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎docs/guide/low-level-api.md‎
Lines changed: 82 additions & 0 deletions b/‎docs/guide/low-level-api.md‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎llama/CMakeLists.txt‎
Lines changed: 8 additions & 0 deletions b/‎llama/CMakeLists.txt‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎llama/addon/AddonContext.cpp‎
Lines changed: 139 additions & 0 deletions b/‎llama/addon/AddonContext.cpp‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎llama/addon/AddonContext.h‎
Lines changed: 3 additions & 0 deletions b/‎llama/addon/AddonContext.h‎
Lines changed: 3 additions & 0 deletions
@@ -3,6 +3,8 @@ description: Report a reproducible bug
 labels:
   - requires triage
   - bug
+title: "bug: "
+type: "Bug"
 body:
   - type: markdown
     attributes:
 
@@ -3,6 +3,8 @@ description: Documentation is unclear or otherwise insufficient.
 labels:
   - requires triage
   - documentation
+title: "docs: "
+type: "Documentation"
 body:
   - type: markdown
     attributes:
 
@@ -3,6 +3,8 @@ description: Suggest an new idea for this project
 labels:
   - requires triage
   - new feature
+title: "feat: "
+type: "Feature"
 body:
   - type: markdown
     attributes:
 
@@ -14,8 +14,10 @@ node_modules
 /.vitepress/.cache
 /test/.models
 /test/temp
+/test/.temp
 /temp
 /coverage
+/test-runner-profile
 
 /llama/compile_commands.json
 /llama/llama.cpp
 
@@ -15,6 +15,8 @@ import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue";
 * [Manzoni](https://manzoni.app/) ([GitHub](https://github.com/gems-platforms/manzoni-app)) - a text editor running local LLMs
   <br /><DataBadge title="License" content="AGPL-3.0"/>
 
+* [Clippy](https://felixrieseberg.github.io/clippy/) ([GitHub](https://github.com/felixrieseberg/clippy)) - Clippy, resurrected from the 1990s, now with some AI
+  <br /><DataBadge title="License" content="MIT"/>
 
 ## Proprietary
 * [BashBuddy](https://bashbuddy.run) ([GitHub](https://github.com/wosherco/bashbuddy)) - write bash commands with natural language
 
@@ -446,6 +446,87 @@ console.log("AI: " + a2);
 ```
 :::
 
+:::: details Saving and restoring a context sequence evaluation state {#save-and-restore-with-context-sequence-state}
+You can also save and restore the context sequence evaluation state to avoid re-evaluating the chat history
+when you load it on a new context sequence.
+
+Please note that context sequence state files can get very large (109MB for only 1K tokens).
+Using this feature is only recommended when the chat history is very long and you plan to load it often,
+or when the evaluation is too slow due to hardware limitations.
+
+::: warning
+When loading a context sequence state from a file,
+always ensure that the model used to create the context sequence is exactly the same as the one used to save the state file.
+
+Loading a state file created from a different model can crash the process,
+thus you have to pass `{acceptRisk: true}` to the [`loadStateFromFile`](../api/classes/LlamaContextSequence.md#loadstatefromfile) method to use it.
+
+Use with caution.
+:::
+
+::: code-group
+```typescript [Save chat history and context sequence state]
+import {fileURLToPath} from "url";
+import path from "path";
+import fs from "fs/promises";
+import {getLlama, LlamaChatSession} from "node-llama-cpp";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: path.join(__dirname, "models", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf")
+});
+const context = await model.createContext();
+const contextSequence = context.getSequence();
+const session = new LlamaChatSession({contextSequence});
+
+
+const q1 = "Hi there, how are you?";
+console.log("User: " + q1);
+
+const a1 = await session.prompt(q1);
+console.log("AI: " + a1);
+
+const chatHistory = session.getChatHistory();// [!code highlight]
+await Promise.all([// [!code highlight]
+    contextSequence.saveStateToFile("state.bin"),// [!code highlight]
+    fs.writeFile("chatHistory.json", JSON.stringify(chatHistory), "utf8")// [!code highlight]
+]);// [!code highlight]
+```
+:::
+
+::: code-group
+```typescript [Restore chat history and context sequence state]
+import {fileURLToPath} from "url";
+import path from "path";
+import fs from "fs/promises";
+import {getLlama, LlamaChatSession} from "node-llama-cpp";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+// ---cut---
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: path.join(__dirname, "models", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf")
+});
+const context = await model.createContext();
+const contextSequence = context.getSequence();
+const session = new LlamaChatSession({contextSequence});
+
+await contextSequence.loadStateFromFile("state.bin", {acceptRisk: true});// [!code highlight]
+const chatHistory = JSON.parse(await fs.readFile("chatHistory.json", "utf8"));// [!code highlight]
+session.setChatHistory(chatHistory);// [!code highlight]
+
+const q2 = "Summarize what you said";
+console.log("User: " + q2);
+
+const a2 = await session.prompt(q2);
+console.log("AI: " + a2);
+```
+:::
+
+::::
+
 ## Prompt Without Updating Chat History {#prompt-without-updating-chat-history}
 Prompt without saving the prompt to the chat history.
 
 
@@ -391,3 +391,85 @@ console.log(
     newTokens
 );
 ```
+
+### Save and Restore State {#save-and-restore-state}
+You can save the evaluation state of a context sequence to then later load it back.
+
+This is useful for avoiding the evaluation of tokens that you've already evaluated in the past.
+
+::: warning
+When loading a context sequence state from a file,
+always ensure that the model used to create the context sequence is exactly the same as the one used to save the state file.
+
+Loading a state file created from a different model can crash the process,
+thus you have to pass `{acceptRisk: true}` to the [`loadStateFromFile`](../api/classes/LlamaContextSequence.md#loadstatefromfile) method to use it.
+
+Use with caution.
+:::
+
+::: code-group
+```typescript [Save state]
+import {fileURLToPath} from "url";
+import path from "path";
+import {getLlama} from "node-llama-cpp";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: path.join(__dirname, "models", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf")
+});
+const context = await model.createContext();
+const sequence = context.getSequence();
+
+const input = "The best way to";
+const tokens = model.tokenize(input);
+await sequence.evaluateWithoutGeneratingNewTokens(tokens);
+
+console.log(
+    "Current state:",
+    model.detokenize(sequence.contextTokens, true),
+    sequence.contextTokens
+);
+
+await sequence.saveStateToFile("state.bin");// [!code highlight]
+```
+:::
+
+::: code-group
+```typescript [Load state]
+import {fileURLToPath} from "url";
+import path from "path";
+import {getLlama, Token} from "node-llama-cpp";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+// ---cut---
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: path.join(__dirname, "models", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf")
+});
+const context = await model.createContext();
+const sequence = context.getSequence();
+
+await sequence.loadStateFromFile("state.bin", {acceptRisk: true});// [!code highlight]
+
+console.log(
+    "Loaded state:",
+    model.detokenize(sequence.contextTokens, true),
+    sequence.contextTokens
+);
+
+const input = " find";
+const inputTokens = model.tokenize(input);
+const maxTokens = 10;
+const res: Token[] = [];
+for await (const token of sequence.evaluate(inputTokens)) {
+    res.push(token);
+
+    if (res.length >= maxTokens)
+        break;
+}
+
+console.log("Result:", model.detokenize(res));
+```
+:::
@@ -4,6 +4,14 @@ if (NLC_CURRENT_PLATFORM STREQUAL "win-x64" OR NLC_CURRENT_PLATFORM STREQUAL "wi
     set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 endif()
 
+if (NLC_CURRENT_PLATFORM STREQUAL "win-x64")
+    if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL" CACHE STRING "" FORCE)
+    else()
+        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL" CACHE STRING "" FORCE)
+    endif()
+endif()
+
 if (NLC_TARGET_PLATFORM STREQUAL "win-arm64" AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config") AND NOT MINGW)
     if(NLC_CURRENT_PLATFORM STREQUAL "win-x64")
         include("./profiles/llvm.win32.host-x64.target-arm64.cmake")
 
@@ -702,6 +702,143 @@ Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
     return info.Env().Undefined();
 }
 
+class AddonContextSaveSequenceStateToFileWorker : public Napi::AsyncWorker {
+    public:
+        AddonContext* context;
+        std::string filepath;
+        llama_seq_id sequenceId;
+        std::vector<llama_token> tokens;
+        size_t savedFileSize = 0;
+
+        AddonContextSaveSequenceStateToFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
+            : Napi::AsyncWorker(info.Env(), "AddonContextSaveSequenceStateToFileWorker"),
+              context(context),
+              deferred(Napi::Promise::Deferred::New(info.Env())) {
+            context->Ref();
+
+            filepath = info[0].As<Napi::String>().Utf8Value();
+            sequenceId = info[1].As<Napi::Number>().Int32Value();
+            Napi::Uint32Array inputTokens = info[2].As<Napi::Uint32Array>();
+
+            tokens.resize(inputTokens.ElementLength());
+            for (size_t i = 0; i < tokens.size(); i++) {
+                tokens[i] = inputTokens[i];
+            }
+        }
+        ~AddonContextSaveSequenceStateToFileWorker() {
+            context->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                savedFileSize = llama_state_seq_save_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size());
+                if (savedFileSize == 0) {
+                    SetError("Failed to save state to file");
+                    return;
+                }
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_state_seq_save_file\"");
+            }
+        }
+        void OnOK() {
+            deferred.Resolve(Napi::Number::New(Env(), savedFileSize));
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+Napi::Value AddonContext::SaveSequenceStateToFile(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    AddonContextSaveSequenceStateToFileWorker* worker = new AddonContextSaveSequenceStateToFileWorker(info, this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+
+class AddonContextLoadSequenceStateFromFileWorker : public Napi::AsyncWorker {
+    public:
+        AddonContext* context;
+        std::string filepath;
+        llama_seq_id sequenceId;
+        size_t maxContextSize;
+        std::vector<llama_token> tokens;
+
+        AddonContextLoadSequenceStateFromFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
+            : Napi::AsyncWorker(info.Env(), "AddonContextLoadSequenceStateFromFileWorker"),
+              context(context),
+              deferred(Napi::Promise::Deferred::New(info.Env())) {
+            context->Ref();
+
+            filepath = info[0].As<Napi::String>().Utf8Value();
+            sequenceId = info[1].As<Napi::Number>().Int32Value();
+            maxContextSize = info[2].As<Napi::Number>().Uint32Value();
+
+            tokens.resize(maxContextSize);
+        }
+        ~AddonContextLoadSequenceStateFromFileWorker() {
+            context->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                size_t tokenCount = 0;
+                const size_t fileSize = llama_state_seq_load_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size(), &tokenCount);
+                if (fileSize == 0) {
+                    SetError("Failed to load state from file. Current context sequence size may be smaller that the state of the file");
+                    return;
+                }
+
+                tokens.resize(tokenCount);
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_state_seq_load_file\"");
+            }
+        }
+        void OnOK() {
+            size_t tokenCount = tokens.size();
+            Napi::Uint32Array result = Napi::Uint32Array::New(Env(), tokenCount);
+
+            for (size_t i = 0; i < tokenCount; i++) {
+                result[i] = tokens[i];
+            }
+
+            deferred.Resolve(result);
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+Napi::Value AddonContext::LoadSequenceStateFromFile(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    AddonContextLoadSequenceStateFromFileWorker* worker = new AddonContextLoadSequenceStateFromFileWorker(info, this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+
 Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
     llama_perf_context_print(ctx);
     llama_perf_context_reset(ctx);
@@ -797,6 +934,8 @@ void AddonContext::init(Napi::Object exports) {
                 InstanceMethod("setThreads", &AddonContext::SetThreads),
                 InstanceMethod("printTimings", &AddonContext::PrintTimings),
                 InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
+                InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
+                InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
                 InstanceMethod("setLora", &AddonContext::SetLora),
                 InstanceMethod("dispose", &AddonContext::Dispose),
             }
 
@@ -44,6 +44,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
         Napi::Value GetThreads(const Napi::CallbackInfo& info);
         Napi::Value SetThreads(const Napi::CallbackInfo& info);
 
+        Napi::Value SaveSequenceStateToFile(const Napi::CallbackInfo& info);
+        Napi::Value LoadSequenceStateFromFile(const Napi::CallbackInfo& info);
+
         Napi::Value PrintTimings(const Napi::CallbackInfo& info);
         Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);