withcatai
diff --git a/‎.config/typedoc.json‎
Lines changed: 4 additions & 0 deletions b/‎.config/typedoc.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 12 additions & 12 deletions b/‎.github/workflows/build.yml‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎.vitepress/config.ts‎
Lines changed: 45 additions & 6 deletions b/‎.vitepress/config.ts‎
Lines changed: 45 additions & 6 deletions
diff --git a/‎.vitepress/utils/getCommandHtmlDoc.ts‎
Lines changed: 2 additions & 2 deletions b/‎.vitepress/utils/getCommandHtmlDoc.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/guide/cli/cli.data.ts‎
Lines changed: 3 additions & 0 deletions b/‎docs/guide/cli/cli.data.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/guide/cli/inspect.md‎
Lines changed: 17 additions & 0 deletions b/‎docs/guide/cli/inspect.md‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎docs/guide/troubleshooting.md‎
Lines changed: 47 additions & 0 deletions b/‎docs/guide/troubleshooting.md‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎docs/guide/vulkan.md‎
Lines changed: 113 additions & 0 deletions b/‎docs/guide/vulkan.md‎
Lines changed: 113 additions & 0 deletions
@@ -18,7 +18,11 @@
     "preserveAnchorCasing": true,
     "useCodeBlocks": true,
     "expandObjects": true,
+    "expandParameters": true,
     "parametersFormat": "table",
+    "propertiesFormat": "list",
+    "enumMembersFormat": "table",
+    "typeDeclarationFormat": "list",
     "hideInPageTOC": true,
     "docsRoot": "../docs"
 }
@@ -198,19 +198,19 @@ jobs:
           
           // build binaries
           if (process.env.ARTIFACT_NAME === "win") {
-            await buildBinary("x64");
-            await buildBinary("x64", ["--cuda"]);
-            await buildBinary("x64", ["--vulkan"]);
-            // await buildBinary("arm64", [], windowsOnArmNodeVersion); // disabled arm64 for now as compilation doesn't work
+            await buildBinary("x64", ["--gpu", "false"]);
+            await buildBinary("x64", ["--gpu", "cuda"]);
+            await buildBinary("x64", ["--gpu", "vulkan"]);
+            // await buildBinary("arm64", ["--gpu", "false"], windowsOnArmNodeVersion); // disabled arm64 for now as compilation doesn't work
           } else if (process.env.ARTIFACT_NAME === "linux") {
-            await buildBinary("x64");
-            await buildBinary("x64", ["--cuda"]);
-            await buildBinary("x64", ["--vulkan"]);
-            await buildBinary("arm64");
-            await buildBinary("armv7l");
+            await buildBinary("x64", ["--gpu", "false"]);
+            await buildBinary("x64", ["--gpu", "cuda"]);
+            await buildBinary("x64", ["--gpu", "vulkan"]);
+            await buildBinary("arm64", ["--gpu", "false"]);
+            await buildBinary("armv7l", ["--gpu", "false"]);
           } else if (process.env.ARTIFACT_NAME === "mac") {
-            await buildBinary("arm64", ["--metal"]);
-            await buildBinary("x64", ["--no-metal"]);
+            await buildBinary("arm64", ["--gpu", "metal"]);
+            await buildBinary("x64", ["--gpu", "false"]);
           }
           
           // move binaries to llamaBins
@@ -275,7 +275,7 @@ jobs:
     name: Model dependent tests
     runs-on: macos-14
     env:
-      NODE_LLAMA_CPP_METAL: false
+      NODE_LLAMA_CPP_GPU: false
     needs:
       - build
     steps:
 
@@ -23,6 +23,19 @@ const chatWrappersOrder = [
     "FalconChatWrapper"
 ] as const;
 
+const categoryOrder = [
+    "Functions",
+    "Classes",
+    "Types",
+    "Enums"
+] as const;
+
+const functionsOrder = [
+    "getLlama",
+    "defineChatSessionFunction",
+    "LlamaText"
+] as const;
+
 function resolveHref(href: string) {
     if (urlBase == null)
         return href;
@@ -149,7 +162,9 @@ export default defineConfig({
                 items: [
                     {text: "Building from source", link: "/building-from-source"},
                     {text: "Metal support", link: "/Metal"},
-                    {text: "CUDA support", link: "/CUDA"}
+                    {text: "CUDA support", link: "/CUDA"},
+                    {text: "Vulkan support", link: "/vulkan"},
+                    {text: "Troubleshooting", link: "/troubleshooting"}
                 ]
             }, {
                 text: "Contributing",
@@ -166,6 +181,9 @@ export default defineConfig({
                 items: [
                     {text: "Chat", link: "/chat"},
                     {text: "Download", link: "/download"},
+                    {text: "Complete", link: "/complete"},
+                    {text: "Infill", link: "/infill"},
+                    {text: "Inspect", link: "/inspect"},
                     {text: "Build", link: "/build"},
                     {text: "Clear", link: "/clear"}
                 ]
@@ -184,6 +202,7 @@ function getApiReferenceSidebar(): typeof typedocSidebar {
                 case "README":
                 case "API":
                     return null;
+
                 case "Classes":
                 case "Type Aliases":
                 case "Functions":
@@ -200,7 +219,23 @@ function getApiReferenceSidebar(): typeof typedocSidebar {
                                 delete subItem.collapsed;
 
                             return subItem;
-                        })
+                        });
+
+                    return item;
+
+                case "Enumerations":
+                    item.text = "Enums";
+
+                    if (item.collapsed)
+                        item.collapsed = false;
+                    return item;
+
+                case "Variables":
+                    item.text = "Enums";
+
+                    if (item.collapsed)
+                        item.collapsed = false;
+
                     return item;
             }
 
@@ -214,6 +249,8 @@ function orderApiReferenceSidebar(sidebar: typeof typedocSidebar): typeof typedo
     orderTypes(sidebar);
     orderFunctions(sidebar);
 
+    sortItemsInOrder(sidebar, categoryOrder);
+
     return sidebar;
 }
 
@@ -345,18 +382,20 @@ function orderTypes(sidebar: typeof typedocSidebar) {
 }
 
 function orderFunctions(sidebar: typeof typedocSidebar) {
-    const types = sidebar.find((item) => item.text === "Functions");
+    const functions = sidebar.find((item) => item.text === "Functions");
 
-    if (types == null || !(types.items instanceof Array))
+    if (functions == null || !(functions.items instanceof Array))
         return;
 
     groupItems(
-        types.items,
+        functions.items,
         (item) => item.text === "LlamaText",
         (item) => item.text != null && ["isLlamaText", "tokenizeText"].includes(item.text)
     );
 
-    moveCollapseItemsToTheEnd(types.items);
+    sortItemsInOrder(functions.items, functionsOrder);
+
+    moveCollapseItemsToTheEnd(functions.items);
 }
 
 
 
@@ -161,11 +161,11 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}
         }
 
         if (option.type != null) {
-            optionDescription.push(`(<code>${htmlEscape(option.type)}</code>)`);
+            optionDescription.push(`<code><span style="opacity: 0.4">(</span>${htmlEscape(option.type)}<span style="opacity: 0.4">)</span></code>`);
         }
 
         if (option.demandOption) {
-            optionDescription.push(`(<code>${htmlEscape("required")}</code>)`);
+            optionDescription.push(`<code><span style="opacity: 0.4">(</span>${htmlEscape("required")}<span style="opacity: 0.4">)</span></code>`);
         }
 
         if (option.choices != null) {
 
@@ -4,6 +4,7 @@ import {BuildCommand} from "../../../src/cli/commands/BuildCommand.js";
 import {ChatCommand} from "../../../src/cli/commands/ChatCommand.js";
 import {CompleteCommand} from "../../../src/cli/commands/CompleteCommand.js";
 import {InfillCommand} from "../../../src/cli/commands/InfillCommand.js";
+import {InspectCommand} from "../../../src/cli/commands/InspectCommand.js";
 import {DownloadCommand} from "../../../src/cli/commands/DownloadCommand.js";
 import {ClearCommand} from "../../../src/cli/commands/ClearCommand.js";
 import {htmlEscape} from "../../../.vitepress/utils/htmlEscape.js";
@@ -21,6 +22,7 @@ export default {
                 ["chat", ChatCommand],
                 ["complete", CompleteCommand],
                 ["infill", InfillCommand],
+                ["inspect", InspectCommand],
                 ["download", DownloadCommand],
                 ["build", BuildCommand],
                 ["clear", ClearCommand]
@@ -29,6 +31,7 @@ export default {
             chat: await getCommandHtmlDoc(ChatCommand),
             complete: await getCommandHtmlDoc(CompleteCommand),
             infill: await getCommandHtmlDoc(InfillCommand),
+            inspect: await getCommandHtmlDoc(InspectCommand),
             download: await getCommandHtmlDoc(DownloadCommand),
             build: await getCommandHtmlDoc(BuildCommand),
             clear: await getCommandHtmlDoc(ClearCommand)
 
@@ -0,0 +1,17 @@
+---
+outline: deep
+---
+# `inspect` command
+
+<script setup lang="ts">
+import {data as docs} from "./cli.data.js";
+const commandDoc = docs.inspect;
+</script>
+
+{{commandDoc.description}}
+
+## Usage
+```shell-vue
+{{commandDoc.usage}}
+```
+<div v-html="commandDoc.options"></div>
@@ -0,0 +1,47 @@
+---
+outline: [2, 3]
+---
+# Troubleshooting
+## ESM usage
+`node-llama-cpp` is an [ES module](https://nodejs.org/api/esm.html#modules-ecmascript-modules), so can only use `import` to load it and cannot use [`require`](https://nodejs.org/docs/latest-v18.x/api/esm.html#require:~:text=Using%20require%20to%20load%20an%20ES%20module%20is%20not%20supported%20because%20ES%20modules%20have%20asynchronous%20execution.%20Instead%2C%20use%20import()%20to%20load%20an%20ES%20module%20from%20a%20CommonJS%20module.).
+
+Since the Node.js ecosystem is transitioning to ESM, it's recommended to use it in your project.
+
+To do so, make sure your `package.json` file has `"type": "module"` in it.
+
+### Using in CommonJS
+If you cannot use ESM in your project, you can still use the `import` function from a CommonJS module to load `node-llama-cpp`:
+```typescript
+async function myLogic() {
+    const {getLlama} = await import("node-llama-cpp");
+}
+
+myLogic();
+```
+
+If your `tsconfig.json` is configured to transpile `import` statements into `require` function calls automatically,
+you can use this workaround to `import` `node-llama-cpp`:
+```typescript
+async function myLogic() {
+    const {getLlama} = await Function('return import("node-llama-cpp")')();
+}
+
+myLogic();
+```
+
+## Running in Termux
+In Termux, the prebuilt binaries cannot be used due to the custom linker used by it.
+
+To allow `node-llama-cpp` to build the binaries, install the required packages first:
+```bash
+pkg update
+pkg install nodejs git cmake clang libxml2
+```
+
+For Vulkan support, also install the following packages:
+```bash
+pkg install vulkan-tools vulkan-loader-android vulkan-headers vulkan-extension-layer
+```
+> Note that your device GPU may not support the required capabilities that `llama.cpp` requires, so it may not work.
+> 
+> If that happens, disable Vulkan in your code or uninstall the Vulkan packages.
@@ -0,0 +1,113 @@
+# Using Vulkan
+> Vulkan is a low-overhead, cross-platform 3D graphics and computing API
+
+`node-llama-cpp` ships with prebuilt binaries with Vulkan support for Windows and Linux, and these are automatically used when Vulkan support is detected on your machine.
+
+**Windows:** Vulkan drivers are usually provided together with your GPU drivers, so most chances are that you don't have to install anything.
+
+**Linux:** you have to [install the Vulkan SDK](#vulkan-sdk-ubuntu).
+
+## Testing Vulkan support
+To check whether the Vulkan support works on your machine, run this command:
+```bash
+npx --no node-llama-cpp inspect gpu
+```
+
+You should see an output like this:
+```ansi
+[33mVulkan:[39m [32mavailable[39m
+
+[33mVulkan used VRAM:[39m 0% [90m(64KB/21.33GB)[39m
+[33mVulkan free VRAM:[39m 99.99% [90m(21.33GB/21.33GB)[39m
+
+[33mUsed RAM:[39m 97.37% [90m(31.16GB/32GB)[39m
+[33mFree RAM:[39m 2.62% [90m(860.72MB/32GB)[39m
+```
+
+If you see `Vulkan used VRAM` in the output, it means that Vulkan support is working on your machine.
+
+## Building `node-llama-cpp` with Vulkan support
+### Prerequisites
+* [`cmake-js` dependencies](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake)
+* [CMake](https://cmake.org/download/) 3.26 or higher (optional, recommended if you have build issues)
+* <a id="vulkan-sdk" />[Vulkan SDK](https://vulkan.lunarg.com/sdk/home):
+  >
+  #### Windows: [Vulkan SDK installer](https://sdk.lunarg.com/sdk/download/latest/windows/vulkan-sdk.exe) {#vulkan-sdk-windows}
+  >
+  #### Ubuntu {#vulkan-sdk-ubuntu}
+  ::: code-group
+  
+  ```bash [Ubuntu 22.04]
+  wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
+  sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
+  sudo apt update
+  sudo apt install vulkan-sdk
+  ```
+  
+  ```bash [Ubuntu 20.04]
+  wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
+  sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list https://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
+  sudo apt update
+  sudo apt install vulkan-sdk
+  ```
+  
+  :::
+
+## Building from source
+When you use the [`getLlama`](../api/functions/getLlama) method, if there's no binary that matches the provided options, it'll automatically build `llama.cpp` from source.
+
+Manually building from source using the [`download`](./cli/download) command is recommended for troubleshooting build issues.
+
+To manually build from source, run this command inside of your project:
+```bash
+npx --no node-llama-cpp download --gpu vulkan
+```
+
+> If `cmake` is not installed on your machine, `node-llama-cpp` will automatically download `cmake` to an internal directory and try to use it to build `llama.cpp` from source.
+
+> If you see the message `Vulkan not found` during the build process,
+> it means that the Vulkan SDK is not installed on your machine or that it is not detected by the build process.
+
+## Using `node-llama-cpp` with Vulkan
+It's recommended to use [`getLlama`](../api/functions/getLlama) without specifying a GPU type, so it'll detect the available GPU types and use the best one automatically.
+
+To do this, just use [`getLlama`](../api/functions/getLlama) without any parameters:
+```typescript
+import {getLlama} from "node-llama-cpp";
+
+const llama = await getLlama();
+```
+
+To force it to use Vulkan, you can use the [`gpu`](../api/type-aliases/LlamaOptions#gpu) option:
+```typescript
+import {getLlama} from "node-llama-cpp";
+
+const llama = await getLlama({
+    gpu: "vulkan"
+});
+```
+To configure how much layers of the model are run on the GPU, configure `gpuLayers` on `LlamaModel` in your code:
+```typescript
+const model = new LlamaModel({
+    llama,
+    modelPath,
+    gpuLayers: 64 // or any other number of layers you want
+});
+```
+
+You'll see logs like these in the console when the model loads:
+```
+llm_load_tensors: ggml ctx size =    0.09 MB
+llm_load_tensors: mem required  =   41.11 MB (+ 2048.00 MB per state)
+llm_load_tensors: offloading 32 repeating layers to GPU
+llm_load_tensors: offloading non-repeating layers to GPU
+llm_load_tensors: offloading v cache to GPU
+llm_load_tensors: offloading k cache to GPU
+llm_load_tensors: offloaded 35/35 layers to GPU
+llm_load_tensors: VRAM used: 4741 MB
+```
+
+On Linux, you can monitor GPU usage with this command:
+```bash
+watch -d "npx --no node-llama-cpp inspect gpu"
+```
Original file line number	Diff line number	Diff line change
`@@ -161,11 +161,11 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}`
`161`	`161`	`}`
`162`	`162`
`163`	`163`	`if (option.type != null) {`
`164`		- optionDescription.push(`(<code>${htmlEscape(option.type)}</code>)`);
	`164`	+ optionDescription.push(`<code><span style="opacity: 0.4">(</span>${htmlEscape(option.type)}<span style="opacity: 0.4">)</span></code>`);
`165`	`165`	`}`
`166`	`166`
`167`	`167`	`if (option.demandOption) {`
`168`		- optionDescription.push(`(<code>${htmlEscape("required")}</code>)`);
	`168`	+ optionDescription.push(`<code><span style="opacity: 0.4">(</span>${htmlEscape("required")}<span style="opacity: 0.4">)</span></code>`);
`169`	`169`	`}`
`170`	`170`
`171`	`171`	`if (option.choices != null) {`