docs: improve CUDA documentation (#52)

giladgd · web-flow · commit c94a7fa4597d · 2023-09-26T22:17:22.000+03:00
diff --git a/.config/typedoc.json b/.config/typedoc.json
@@ -1,7 +1,7 @@
 {
     "$schema": "https://typedoc.org/schema.json",
     "entryPoints": ["../src/index.ts"],
-    "out": "../docs",
+    "out": "../docs-site",
     "tsconfig": "../tsconfig.json",
     "customCss": "./typedoc.css",
     "readme": "../README.md",
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -34,8 +34,8 @@ jobs:
       - name: Upload build artifact
         uses: actions/upload-artifact@v3
         with:
-          name: "docs"
-          path: "docs"
+          name: "docs-site"
+          path: "docs-site"
       - name: Upload llama.cpp artifact
         uses: actions/upload-artifact@v3
         with:
@@ -230,7 +230,7 @@ jobs:
           mkdir -p llamaBins
           mv artifacts/bins-*/* llamaBins/
           mv artifacts/build dist/
-          mv artifacts/docs docs/
+          mv artifacts/docs-site docs-site/
           
           cp -r artifacts/llama.cpp/grammars llama/grammars
           
@@ -257,7 +257,7 @@ jobs:
         uses: actions/upload-pages-artifact@v2
         with:
           name: pages-docs
-          path: docs
+          path: docs-site
       - name: Deploy docs to GitHub Pages
         if: steps.set-npm-url.outputs.npm-url != ''
         uses: actions/deploy-pages@v2
diff --git a/.gitignore b/.gitignore
@@ -4,7 +4,7 @@ node_modules
 .DS_Store
 
 /dist
-/docs
+/docs-site
 
 /.env
 /.eslintcache
diff --git a/README.md b/README.md
@@ -221,7 +221,9 @@ npx node-llama-cpp download --cuda
 
 > If `cmake` is not installed on your machine, `node-llama-cpp` will automatically download `cmake` to an internal directory and try to use it to build `llama.cpp` from source.
 > 
-> If the build fails, make sure you have the required dependencies of `cmake` installed on your machine. More info is available [here](https://github.com/cmake-js/cmake-js#installation:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake) (you don't have to install `cmake` or `cmake-js`, just the dependencies).
+> If the build fails, make sure you have the required dependencies of `cmake` installed on your machine. More info is available [here](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake) (you don't have to install `cmake` or `cmake-js`, just the dependencies).
+
+To troubleshoot CUDA issues, visit the [CUDA documentation](https://github.com/withcatai/node-llama-cpp/blob/master/docs/CUDA.md).
 
 ### CLI
 ```
diff --git a/docs/CUDA.md b/docs/CUDA.md
@@ -0,0 +1,65 @@
+# `node-llama-cpp` CUDA support
+## Prerequisites
+* [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads) 12.0 or higher
+* [`cmake-js` dependencies](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake)
+* [CMake](https://cmake.org/download/) 3.26 or higher (optional, recommended if you have build issues)
+
+## Building `node-llama-cpp` with CUDA support
+Run this command inside of your project:
+```bash
+npx --no node-llama-cpp download --cuda
+```
+
+> If `cmake` is not installed on your machine, `node-llama-cpp` will automatically download `cmake` to an internal directory and try to use it to build `llama.cpp` from source.
+
+> If you see the message `cuBLAS not found` during the build process,
+> it means that CUDA Toolkit is not installed on your machine or that it is not detected by the build process.
+
+### Custom `llama.cpp` cmake options
+`llama.cpp` has some options you can use to customize your CUDA build, you can find these [here](https://github.com/ggerganov/llama.cpp/tree/master#cublas).
+
+To build `node-llama-cpp` with any of these options, set an environment variable of an option prefixed with `NODE_LLAMA_CPP_CMAKE_OPTION_`.
+
+### Fix the `Failed to detect a default CUDA architecture` build error
+To fix this issue you have to set the `CUDACXX` environment variable to the path of the `nvcc` compiler.
+
+For example, if you installed CUDA Toolkit 12.2 on Windows, you have to run the following command:
+```bash
+set CUDACXX=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin\nvcc.exe
+```
+
+On Linux, it would be something like this:
+```bash
+export CUDACXX=/usr/local/cuda-12.2/bin/nvcc
+```
+
+Then run the build command again to check whether setting the `CUDACXX` environment variable fixed the issue.
+
+## Using `node-llama-cpp` with CUDA
+After you build `node-llama-cpp` with CUDA support, you can use it normally.
+
+To configure how much layers of the model are run on the GPU, configure `gpuLayers` on `LlamaModel` in your code:
+```typescript
+const model = new LlamaModel({
+    modelPath,
+    gpuLayers: 64 // or any other number of layers you want
+});
+```
+
+You'll see logs like these in the console when the model loads:
+```
+llm_load_tensors: ggml ctx size =    0.09 MB
+llm_load_tensors: using CUDA for GPU acceleration
+llm_load_tensors: mem required  =   41.11 MB (+ 2048.00 MB per state)
+llm_load_tensors: offloading 32 repeating layers to GPU
+llm_load_tensors: offloading non-repeating layers to GPU
+llm_load_tensors: offloading v cache to GPU
+llm_load_tensors: offloading k cache to GPU
+llm_load_tensors: offloaded 35/35 layers to GPU
+llm_load_tensors: VRAM used: 4741 MB
+```
+
+On Linux, you can monitor GPU usage with this command:
+```bash
+watch -d nvidia-smi
+```
diff --git a/src/utils/compileLLamaCpp.ts b/src/utils/compileLLamaCpp.ts
@@ -37,6 +37,7 @@ export async function compileLlamaCpp({
         if (process.env.LLAMA_CUDA_MMV_Y != null) cmakeCustomOptions.push("LLAMA_CUDA_MMV_Y=" + process.env.LLAMA_CUDA_MMV_Y);
         if (process.env.LLAMA_CUDA_F16 != null) cmakeCustomOptions.push("LLAMA_CUDA_F16=" + process.env.LLAMA_CUDA_F16);
         if (process.env.LLAMA_CUDA_KQUANTS_ITER != null) cmakeCustomOptions.push("LLAMA_CUDA_KQUANTS_ITER=" + process.env.LLAMA_CUDA_KQUANTS_ITER);
+        if (process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE != null) cmakeCustomOptions.push("LLAMA_CUDA_PEER_MAX_BATCH_SIZE=" + process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE);
         if (process.env.LLAMA_HIPBLAS === "1") cmakeCustomOptions.push("LLAMA_HIPBLAS=1");
         if (process.env.LLAMA_CLBLAST === "1") cmakeCustomOptions.push("LLAMA_CLBLAST=1");
 

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"$schema": "https://typedoc.org/schema.json",`
`3`	`3`	`"entryPoints": ["../src/index.ts"],`
`4`		`- "out": "../docs",`
	`4`	`+ "out": "../docs-site",`
`5`	`5`	`"tsconfig": "../tsconfig.json",`
`6`	`6`	`"customCss": "./typedoc.css",`
`7`	`7`	`"readme": "../README.md",`
Original file line number	Diff line number	Diff line change
`@@ -221,7 +221,9 @@ npx node-llama-cpp download --cuda`
`221`	`221`
`222`	`222`	> If `cmake` is not installed on your machine, `node-llama-cpp` will automatically download `cmake` to an internal directory and try to use it to build `llama.cpp` from source.
`223`	`223`	`>`
`224`		-> If the build fails, make sure you have the required dependencies of `cmake` installed on your machine. More info is available [here](https://github.com/cmake-js/cmake-js#installation:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake) (you don't have to install `cmake` or `cmake-js`, just the dependencies).
	`224`	+> If the build fails, make sure you have the required dependencies of `cmake` installed on your machine. More info is available [here](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake) (you don't have to install `cmake` or `cmake-js`, just the dependencies).
	`225`	`+`
	`226`	`+To troubleshoot CUDA issues, visit the [CUDA documentation](https://github.com/withcatai/node-llama-cpp/blob/master/docs/CUDA.md).`
`225`	`227`
`226`	`228`	`### CLI`
`227`	`229`	```