withcatai
diff --git a/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 4 additions & 6 deletions b/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/documentation-issue.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/ISSUE_TEMPLATE/documentation-issue.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ISSUE_TEMPLATE/feature-request.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/feature-request.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 4 additions & 1 deletion b/‎.github/workflows/build.yml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎.vitepress/config.ts‎
Lines changed: 30 additions & 4 deletions b/‎.vitepress/config.ts‎
Lines changed: 30 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/guide/chat-session.md‎
Lines changed: 31 additions & 0 deletions b/‎docs/guide/chat-session.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎docs/guide/electron.md‎
Lines changed: 24 additions & 0 deletions b/‎docs/guide/electron.md‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎docs/guide/tips-and-tricks.md‎
Lines changed: 34 additions & 0 deletions b/‎docs/guide/tips-and-tricks.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎llama/CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions b/‎llama/CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions
@@ -35,11 +35,10 @@ body:
     id: steps
     attributes:
       label: Steps to reproduce
-      description: >-
+      description: |-
         Your bug can be investigated much faster if your code can be run without any dependencies other than `node-llama-cpp`.
         Issues without reproduction steps or code examples may be closed as not actionable.
-        Please try to provide a Minimal, Complete, and Verifiable example ([link](http://stackoverflow.com/help/mcve)).
-        Please include a link to the model file you used if possible.
+        Please try to provide a Minimal, Complete, and Verifiable example ([link](http://stackoverflow.com/help/mcve)), including a link to the model file you used if possible.
         Also, please enable enable debug logs by using `getLlama({debug: true})` to get more information.
       placeholder: >-
         Please try to provide a Minimal, Complete, and Verifiable example.
@@ -50,10 +49,9 @@ body:
     id: env
     attributes:
       label: My Environment
-      description: >-
+      description: |-
         Please include the result of the command `npx --yes node-llama-cpp inspect gpu`.
-        Please also add any other relevant dependencies to this table at the end.
-        For example: Electron, Bun, Webpack.
+        Please also add any other relevant dependencies to this table at the end. For example: Electron, Bun, Webpack.
       value: |
         | Dependency               | Version             |
         | ---                      | ---                 |
 
@@ -13,7 +13,7 @@ body:
     id: details
     attributes:
       label: What was unclear or otherwise insufficient?
-      description: >-
+      description: |-
         If relevant, please be clear about the documentation URL, as well as the location within the page.
         Add a link to the relevant documentation you're referring to.
       placeholder: >-
 
@@ -51,8 +51,12 @@ body:
           required: false
         - label: CUDA support
           required: false
+        - label: Vulkan support
+          required: false
         - label: Grammar
           required: false
+        - label: Function calling
+          required: false
   - type: dropdown
     id: pr
     attributes:
 
@@ -383,7 +383,7 @@ jobs:
 
   model-dependent-tests:
     name: Model dependent tests
-    runs-on: macos-13
+    runs-on: macos-12
     env:
       NODE_LLAMA_CPP_GPU: false
     needs:
@@ -417,6 +417,9 @@ jobs:
       - name: Build binary
         run: node ./dist/cli/cli.js source build --noUsageExample
 
+      - name: Inspect hardware
+        run: node ./dist/cli/cli.js inspect gpu
+
       - name: Cache models
         id: cache-test-models
         uses: actions/cache@v4
 
@@ -34,7 +34,8 @@ const packageVersion = env.get("DOCS_PACKAGE_VERSION")
     .default(packageJson.version)
     .asString();
 
-const hostname = "https://node-llama-cpp.withcat.ai/";
+const hostname = "https://node-llama-cpp.withcat.ai/"
+const buildDate = new Date();
 
 const socialPosterLink = hostname + "social.poster.jpg";
 const defaultPageTitle = "node-llama-cpp - node.js bindings for llama.cpp";
@@ -90,7 +91,7 @@ export default defineConfig({
     base: urlBase,
     sitemap: {
         hostname,
-        transformItems(items) {
+        async transformItems(items) {
             function priorityMatch(a: {url: string}, b: {url: string}, matchers: ((url: string) => boolean)[]): number {
                 for (const matcher of matchers) {
                     const aMatch = matcher(a.url);
@@ -105,13 +106,38 @@ export default defineConfig({
                 return 0;
             }
 
+            const blogPosts = await createContentLoader("blog/*.md", {
+                excerpt: true,
+                render: true
+            })
+                .load();
+            const blogPostMap = new Map<string, typeof blogPosts[number]>();
+            for (const blogPost of blogPosts) {
+                let url = blogPost.url;
+                if (url.startsWith("/"))
+                    url = url.slice("/".length);
+
+                blogPostMap.set(url, blogPost);
+            }
+
             return items
                 .map((item) => {
-                    if (item.url.startsWith("api/") || item.url.startsWith("cli/")) {
+                    if (item.url === "" || item.url === "blog/") {
+                        item.lastmod = new Date(buildDate);
+                    } else if (item.url.startsWith("api/") || item.url.startsWith("cli/")) {
                         item = {
                             ...item,
-                            lastmod: undefined
+                            lastmod: new Date(buildDate)
                         };
+                    } else if (item.lastmod == null && item.url.startsWith("blog/")) {
+                        const postDate = blogPostMap.get(item.url)?.frontmatter.date;
+                        if (postDate != null) {
+                            const parsedDate = new Date(postDate);
+                            if (Number.isFinite(parsedDate.getTime()))
+                                item.lastmod = parsedDate;
+                        }
+                    } else if (item.lastmod == null) {
+                        item.lastmod = new Date(buildDate);
                     }
 
                     return item;
 
@@ -1,5 +1,5 @@
 <div align="center">
-    <img alt="node-llama-cpp Logo" src="https://raw.githubusercontent.com/withcatai/node-llama-cpp/master/assets/logo.v3.roundEdges.avif" width="360px" />
+    <a href="https://node-llama-cpp.withcat.ai" target="_blank"><img alt="node-llama-cpp Logo" src="https://raw.githubusercontent.com/withcatai/node-llama-cpp/master/assets/logo.v3.roundEdges.avif" width="360px" /></a>
     <h1>node-llama-cpp</h1>
     <p>Run AI models locally on your machine</p>
     <sub>Pre-built bindings are provided with a fallback to building from source with cmake</sub>
 
@@ -671,3 +671,34 @@ await new Promise(resolve => setTimeout(resolve, 1500));
 const cachedCompletion = completionEngine.complete("Hi there! How");
 console.log("Cached completion:", cachedCompletion);
 ```
+
+## Response Prefix {#response-prefix}
+You can force the model response to start with a specific prefix,
+to make the model follow a certain direction in its response.
+
+```typescript
+import {fileURLToPath} from "url";
+import path from "path";
+import {getLlama, LlamaChatSession, GeneralChatWrapper} from "node-llama-cpp";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: path.join(__dirname, "models", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf")
+});
+const context = await model.createContext();
+const session = new LlamaChatSession({
+    contextSequence: context.getSequence(),
+    chatWrapper: new GeneralChatWrapper()
+});
+
+
+const q1 = "Hi there, how are you?";
+console.log("User: " + q1);
+
+const a1 = await session.prompt(q1, {
+    responsePrefix: "The weather today is"
+});
+console.log("AI: " + a1);
+```
@@ -37,3 +37,27 @@ so that `node-llama-cpp` can find them.
 Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to you machine when your run `npm install`.
 
 Packaging an `arm64` app on an `x64` machine is supported, but packaging an `x64` app on an `arm64` machine is not.
+
+## Bundling
+When bundling your code for Electron using [Electron Vite](https://electron-vite.org) or Webpack,
+ensure that `node-llama-cpp` is not bundled, and is instead treated as an external module.
+
+Marking `node-llama-cpp` as an external module will prevent its code from being bundled with your application code,
+and instead, it'll be loaded from the `node_modules` directory at runtime (which should be packed into a `.asar` archive).
+
+The file structure of `node-llama-cpp` is crucial for it to function correctly,
+so bundling it will break its functionality.
+Moreover, since `node-llama-cpp` includes prebuilt binaries (and also local builds from source),
+those files must be retained in their original structure for it to work.
+
+Electron has [its own bundling solution called ASAR](https://www.electronjs.org/docs/latest/tutorial/asar-archives) that is designed to work with node modules.
+ASAR retains the original file structure of node modules by packing all the files into a single `.asar` archive file that Electron will read from at runtime like it would from the file system.
+This method ensures node modules work as intended in Electron applications, even though they are bundled into a single file.
+
+Using ASAR is the recommended way to bundle `node-llama-cpp` in your Electron app.
+
+If you're using the scaffolded Electron app, this is already taken care of.
+
+::: tip NOTE
+We recommend using [Electron Vite](https://electron-vite.org) over Webpack for your Electron app due to to Vite's speed and Webpack's lack of proper ESM support in the output bundle, which complicates the bundling process.
+:::
@@ -85,3 +85,37 @@ npx --no node-llama-cpp source download
 ```
 
 Now, just use `node-llama-cpp` as you normally would.
+
+## Intel AMX {#intel-amx}
+> Intel AMX (Advanced Matrix Extensions) is a dedicated hardware block found on Intel Xeon processors
+> that helps optimize and accelerate matrix multiplication operations.
+> 
+> It's available on the 4th Gen and newer Intel Xeon processors.
+
+Intel AMX can improve CPU inference performance [by 2x and up to even 14x](https://github.com/ggerganov/llama.cpp/pull/7707) faster inference times on supported CPUs (on specific conditions).
+
+If you're using a 4th Gen or newer Intel Xeon processor,
+you might want to [build `llama.cpp` from source](./building-from-source.md) to utilize these hardware-specific optimizations available on your hardware.
+
+To do this, run this command inside your project on the machine you run your project on:
+```shell
+npx --no node-llama-cpp source download
+```
+
+Alternatively, you can force `node-llama-cpp` to not use its prebuilt binaries
+and instead build from source when calling [`getLlama`](../api/functions/getLlama.md) for the first time on a Xeon CPU:
+
+```typescript
+import os from "os";
+import {getLlama} from "node-llama-cpp";
+
+const llama = await getLlama({
+    usePrebuiltBinaries: !os.cpus().some((cpu) => (
+        cpu.model.toLowerCase().includes("Xeon".toLowerCase())
+    ))
+});
+```
+::: info NOTE
+Building from source can take some time (when using CUDA even up to an hour in extreme cases),
+so ensure you dedicate some time for this as part of the deployment process.
+:::
@@ -22,6 +22,12 @@ execute_process(COMMAND node -p "require('node-addon-api').include.slice(1,-1)"
                 OUTPUT_VARIABLE NODE_ADDON_API_DIR
                 OUTPUT_STRIP_TRAILING_WHITESPACE)
 
+set(LLAMA_BUILD_COMMON ON)
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    add_compile_options(-Wno-c++17-extensions)
+endif()
+
 include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
 
 add_subdirectory("llama.cpp")