Skip to content

Commit b10999d

Browse files
authored
fix: CUDA 13 support (#494)
fix: prebuilt binaries CUDA 13 support
1 parent 12749c0 commit b10999d

File tree

9 files changed

+188
-33
lines changed

9 files changed

+188
-33
lines changed

.github/workflows/build.yml

Lines changed: 93 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,18 @@ jobs:
5656
fail-fast: false
5757
matrix:
5858
config:
59-
- name: "Windows for x64"
59+
- name: "Windows (1)"
6060
os: windows-2022
61-
artifact: "win-x64"
62-
- name: "Windows for Arm"
61+
artifact: "win-1"
62+
- name: "Windows (2)"
6363
os: windows-2022
64-
artifact: "win-arm"
65-
- name: "Ubuntu"
64+
artifact: "win-2"
65+
- name: "Ubuntu (1)"
66+
os: ubuntu-22.04
67+
artifact: "linux-1"
68+
- name: "Ubuntu (2)"
6669
os: ubuntu-22.04
67-
artifact: "linux"
70+
artifact: "linux-2"
6871
- name: "macOS x64"
6972
os: macos-13
7073
artifact: "mac-x64"
@@ -97,8 +100,8 @@ jobs:
97100
choco install cmake --version=3.31.1
98101
choco install ninja
99102
100-
- name: Install dependencies on Ubuntu
101-
if: matrix.config.name == 'Ubuntu'
103+
- name: Install dependencies on Ubuntu (1)
104+
if: matrix.config.name == 'Ubuntu (1)'
102105
run: |
103106
sudo apt-get update
104107
sudo apt-get install ninja-build cmake libtbb-dev g++-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf
@@ -108,25 +111,66 @@ jobs:
108111
109112
which arm-linux-gnueabihf-gcc
110113
which arm-linux-gnueabihf-g++
114+
115+
cmake --version
116+
117+
- name: Install dependencies on Ubuntu (2)
118+
if: matrix.config.name == 'Ubuntu (2)'
119+
run: |
120+
sudo apt-get update
121+
sudo apt-get install ninja-build libtbb-dev
122+
123+
wget -c https://github.com/Kitware/CMake/releases/download/v3.31.7/cmake-3.31.7-linux-x86_64.tar.gz
124+
sudo tar --strip-components=1 -C /usr/local -xzf cmake-3.31.7-linux-x86_64.tar.gz
125+
rm -f ./cmake-3.31.7-linux-x86_64.tar.gz
126+
127+
cmake --version
111128
112-
- name: Install Cuda on Windows for x64
113-
if: matrix.config.name == 'Windows for x64'
129+
- name: Install Cuda 12.4 on Windows (1)
130+
if: matrix.config.name == 'Windows (1)'
114131
uses: Jimver/[email protected]
115132
with:
116133
cuda: '12.4.0'
117134
method: 'network'
118135
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
119136
use-local-cache: false
120137

121-
- name: Install Cuda on Ubuntu
122-
if: matrix.config.name == 'Ubuntu'
138+
- name: Install Cuda 13.0 on Windows (2)
139+
if: matrix.config.name == 'Windows (2)'
140+
shell: bash
141+
timeout-minutes: 30
142+
run: |
143+
curl -Lo cuda_13.0.0_windows_network.exe https://developer.download.nvidia.com/compute/cuda/13.0.0/network_installers/cuda_13.0.0_windows_network.exe
144+
145+
echo "Installing Cuda 13.0.0"
146+
powershell -Command "Start-Process -FilePath cuda_13.0.0_windows_network.exe -ArgumentList '-s','-n' -Wait"
147+
echo "Cuda installation finished"
148+
149+
rm -f ./cuda_13.0.0_windows_network.exe
150+
151+
echo "where cudart64_13.dll: $(where cudart64_13.dll)"
152+
153+
echo "CUDA_PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v13.0" >> $GITHUB_ENV
154+
echo "CUDA_PATH_V13_0=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v13.0" >> $GITHUB_ENV
155+
echo "CUDA_PATH_VX_Y=CUDA_PATH_V13_0" >> $GITHUB_ENV
156+
echo "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v13.0\\bin" >> $GITHUB_PATH
157+
158+
- name: Install Cuda 12.4 on Ubuntu
159+
if: matrix.config.name == 'Ubuntu (1)'
123160
uses: Jimver/[email protected]
124161
with:
125162
cuda: '12.4.0'
126163
method: 'network'
127164

128-
- name: Install Vulkan SDK on Windows for x64
129-
if: matrix.config.name == 'Windows for x64'
165+
- name: Install Cuda 13.0 on Ubuntu
166+
if: matrix.config.name == 'Ubuntu (2)'
167+
uses: Jimver/[email protected]
168+
with:
169+
cuda: '13.0.0'
170+
method: 'network'
171+
172+
- name: Install Vulkan SDK on Windows (1)
173+
if: matrix.config.name == 'Windows (1)'
130174
shell: powershell
131175
env:
132176
VULKAN_VERSION: 1.3.261.1
@@ -137,7 +181,7 @@ jobs:
137181
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
138182
139183
- name: Install Vulkan SDK on Ubuntu
140-
if: matrix.config.name == 'Ubuntu'
184+
if: matrix.config.name == 'Ubuntu (1)'
141185
run: |
142186
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
143187
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
@@ -205,18 +249,21 @@ jobs:
205249
}
206250
207251
// build binaries
208-
if (process.env.ARTIFACT_NAME === "win-x64") {
252+
if (process.env.ARTIFACT_NAME === "win-1") {
209253
await buildBinary("x64", ["--gpu", "false"]);
210254
await buildBinary("x64", ["--gpu", "cuda"]);
211255
await buildBinary("x64", ["--gpu", "vulkan"]);
212-
} else if (process.env.ARTIFACT_NAME === "win-arm") {
256+
} else if (process.env.ARTIFACT_NAME === "win-2") {
213257
await buildBinary("arm64", ["--gpu", "false"], windowsOnArmNodeVersion);
214-
} else if (process.env.ARTIFACT_NAME === "linux") {
258+
await buildBinary("x64", ["--gpu", "cuda"]);
259+
} else if (process.env.ARTIFACT_NAME === "linux-1") {
215260
await buildBinary("x64", ["--gpu", "false"]);
216261
await buildBinary("x64", ["--gpu", "cuda"]);
217262
await buildBinary("x64", ["--gpu", "vulkan"]);
218263
await buildBinary("arm64", ["--gpu", "false"]);
219264
await buildBinary("armv7l", ["--gpu", "false"]);
265+
} else if (process.env.ARTIFACT_NAME === "linux-2") {
266+
await buildBinary("x64", ["--gpu", "cuda"]);
220267
} else if (process.env.ARTIFACT_NAME === "mac-x64") {
221268
await buildBinary("x64", ["--gpu", "false"]);
222269
} else if (process.env.ARTIFACT_NAME === "mac-arm64") {
@@ -233,6 +280,26 @@ jobs:
233280
);
234281
}
235282
283+
if (process.env.ARTIFACT_NAME === "win-2") {
284+
await fs.move(
285+
path.join(llamaBinsDirectoryPath, "win-x64-cuda"),
286+
path.join(llamaBinsDirectoryPath, "win-x64-cuda-2")
287+
);
288+
289+
if (!(await fs.pathExists(path.join(llamaBinsDirectoryPath, "win-x64-cuda-2", "ggml-cuda.dll")))) {
290+
throw new Error("ggml-cuda.dll not found in win-x64-cuda-2");
291+
}
292+
} else if (process.env.ARTIFACT_NAME === "linux-2") {
293+
await fs.move(
294+
path.join(llamaBinsDirectoryPath, "linux-x64-cuda"),
295+
path.join(llamaBinsDirectoryPath, "linux-x64-cuda-2")
296+
);
297+
298+
if (!(await fs.pathExists(path.join(llamaBinsDirectoryPath, "linux-x64-cuda-2", "libggml-cuda.so")))) {
299+
throw new Error("libggml-cuda.so not found in linux-x64-cuda-2");
300+
}
301+
}
302+
236303
await $`echo "Built binaries:"`;
237304
await $`ls bins`;
238305
@@ -494,6 +561,14 @@ jobs:
494561
mv artifacts/bins-*/* bins/
495562
mv artifacts/build dist/
496563
564+
mkdir -p ./bins/win-x64-cuda/fallback
565+
mv ./bins/win-x64-cuda-2/ggml-cuda.dll ./bins/win-x64-cuda/fallback/ggml-cuda.dll
566+
rm -rf ./bins/win-x64-cuda-2
567+
568+
mkdir -p ./bins/linux-x64-cuda/fallback
569+
mv ./bins/linux-x64-cuda-2/libggml-cuda.so ./bins/linux-x64-cuda/fallback/libggml-cuda.so
570+
rm -rf ./bins/linux-x64-cuda-2
571+
497572
cp -r artifacts/llama.cpp/llama.cpp/grammars llama/grammars
498573
499574
rm -f ./llama/binariesGithubRelease.json

docs/guide/CUDA.md

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ If you see `CUDA used VRAM` in the output, it means that CUDA support is working
4343

4444
## Prerequisites
4545
* [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads) 12.4 or higher
46+
* [NVIDIA Drivers](https://www.nvidia.com/en-us/drivers/)
4647
* [`cmake-js` dependencies](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake)
4748
* [CMake](https://cmake.org/download/) 3.26 or higher (optional, recommended if you have build issues)
4849

@@ -89,10 +90,15 @@ export CUDACXX=/usr/local/cuda-12.4/bin/nvcc
8990
export CUDA_PATH=/usr/local/cuda-12.4
9091
```
9192

92-
```cmd [Windows]
93+
```cmd [Windows (cmd)]
9394
set CUDACXX=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin\nvcc.exe
9495
set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4
9596
```
97+
98+
```cmd [Windows (PowerShell)]
99+
$env:CUDACXX="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin\nvcc.exe"
100+
$env:CUDA_PATH="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
101+
```
96102
:::
97103

98104
Then run the build command again to check whether setting the `CUDACXX` and `CUDA_PATH` environment variables fixed the issue.
@@ -110,9 +116,13 @@ To do this, set the `NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET` enviro
110116
export NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=$CUDA_PATH
111117
```
112118

113-
```cmd [Windows]
119+
```cmd [Windows (cmd)]
114120
set NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=%CUDA_PATH%
115121
```
122+
123+
```cmd [Windows (PowerShell)]
124+
$env:NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=$env:CUDA_PATH
125+
```
116126
:::
117127

118128
Then run the build command again to check whether setting the `CMAKE_GENERATOR_TOOLSET` cmake option fixed the issue.
@@ -136,13 +146,22 @@ Run this command inside of your project:
136146
ldd ./node_modules/@node-llama-cpp/linux-x64-cuda/bins/linux-x64-cuda/libggml-cuda.so
137147
```
138148

139-
```cmd [Windows]
149+
```cmd [Windows (cmd)]
140150
"C:\Program Files\Git\usr\bin\ldd.exe" node_modules\@node-llama-cpp\win-x64-cuda\bins\win-x64-cuda\ggml-cuda.dll
141151
```
152+
153+
```cmd [Windows (PowerShell)]
154+
& "C:\Program Files\Git\usr\bin\ldd.exe" node_modules\@node-llama-cpp\win-x64-cuda\bins\win-x64-cuda\ggml-cuda.dll
155+
```
142156
:::
143157

144158
::::
145159

160+
### Fix the `ggml_cuda_init: failed to initialize CUDA: (null)` Error {#fix-failed-to-initialize-cuda-null}
161+
This error usually happens when the NVIDIA drivers installed on your machine are incompatible with the version of CUDA you have installed.
162+
163+
To fix it, update your NVIDIA drivers to the latest version from the [NVIDIA Driver Downloads](https://www.nvidia.com/en-us/drivers/) page.
164+
146165

147166
## Using `node-llama-cpp` With CUDA
148167
It's recommended to use [`getLlama`](../api/functions/getLlama) without specifying a GPU type,

llama/CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,21 @@ execute_process(COMMAND node -p "require('node-addon-api').include.slice(1,-1)"
7676
OUTPUT_STRIP_TRAILING_WHITESPACE)
7777
include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
7878

79+
if (DEFINED GGML_NATIVE)
80+
set(NLC_GGML_NATIVE ${GGML_NATIVE})
81+
elseif(CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH})
82+
set(NLC_GGML_NATIVE OFF)
83+
else()
84+
set(NLC_GGML_NATIVE ON)
85+
endif()
86+
87+
if (GGML_CUDA AND NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT NLC_GGML_NATIVE)
88+
find_package(CUDAToolkit)
89+
if (CUDAToolkit_FOUND AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "13.0")
90+
set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-real;89-real;90-real")
91+
endif()
92+
endif()
93+
7994
add_subdirectory("llama.cpp")
8095
include_directories("llama.cpp")
8196
include_directories("./llama.cpp/common")

src/bindings/Llama.ts

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ import {LlamaGrammar, LlamaGrammarOptions} from "../evaluator/LlamaGrammar.js";
1111
import {ThreadsSplitter} from "../utils/ThreadsSplitter.js";
1212
import {getLlamaClasses, LlamaClasses} from "../utils/getLlamaClasses.js";
1313
import {BindingModule} from "./AddonTypes.js";
14-
import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel, LlamaLogLevelGreaterThanOrEqual, LlamaNuma} from "./types.js";
14+
import {
15+
BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel,
16+
LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaNuma
17+
} from "./types.js";
1518
import {MemoryOrchestrator, MemoryReservation} from "./utils/MemoryOrchestrator.js";
1619

1720
export const LlamaLogLevelToAddonLogLevel: ReadonlyMap<LlamaLogLevel, number> = new Map([
@@ -41,6 +44,7 @@ export class Llama {
4144
/** @internal */ public readonly _swapOrchestrator: MemoryOrchestrator;
4245
/** @internal */ public readonly _debug: boolean;
4346
/** @internal */ public readonly _threadsSplitter: ThreadsSplitter;
47+
/** @internal */ public _hadErrorLogs: boolean = false;
4448
/** @internal */ private readonly _gpu: LlamaGpuType;
4549
/** @internal */ private readonly _numa: LlamaNuma;
4650
/** @internal */ private readonly _buildType: "localBuild" | "prebuilt";
@@ -107,9 +111,17 @@ export class Llama {
107111
}
108112

109113
bindings.loadBackends();
110-
const loadedGpu = bindings.getGpuType();
111-
if (loadedGpu == null || (loadedGpu === false && buildGpu !== false))
112-
bindings.loadBackends(path.dirname(bindingPath));
114+
let loadedGpu = bindings.getGpuType();
115+
if (loadedGpu == null || (loadedGpu === false && buildGpu !== false)) {
116+
const backendsPath = path.dirname(bindingPath);
117+
const fallbackBackendsDir = path.join(backendsPath, "fallback");
118+
119+
bindings.loadBackends(backendsPath);
120+
121+
loadedGpu = bindings.getGpuType();
122+
if (loadedGpu == null || (loadedGpu === false && buildGpu !== false))
123+
bindings.loadBackends(fallbackBackendsDir);
124+
}
113125

114126
bindings.ensureGpuDeviceIsSupported();
115127

@@ -462,6 +474,9 @@ export class Llama {
462474

463475
this._previousLog = message;
464476
this._previousLogLevel = level;
477+
478+
if (!this._hadErrorLogs && LlamaLogLevelGreaterThan(level, LlamaLogLevel.error))
479+
this._hadErrorLogs = true;
465480
}
466481

467482
/** @internal */

src/bindings/types.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,23 @@ export const LlamaVocabularyTypeValues = Object.freeze([
117117
] as const);
118118

119119
/**
120-
*Check if a log level is higher than another log level
120+
* Check if a log level is higher than another log level
121+
* @example
122+
* ```ts
123+
* LlamaLogLevelGreaterThan(LlamaLogLevel.error, LlamaLogLevel.info); // true
124+
* ```
121125
*/
122126
export function LlamaLogLevelGreaterThan(a: LlamaLogLevel, b: LlamaLogLevel): boolean {
123127
return LlamaLogLevelValues.indexOf(a) < LlamaLogLevelValues.indexOf(b);
124128
}
125129

126130
/**
127-
*Check if a log level is higher than or equal to another log level
131+
* Check if a log level is higher than or equal to another log level
132+
* @example
133+
* ```ts
134+
* LlamaLogLevelGreaterThanOrEqual(LlamaLogLevel.error, LlamaLogLevel.info); // true
135+
* LlamaLogLevelGreaterThanOrEqual(LlamaLogLevel.error, LlamaLogLevel.error); // true
136+
* ```
128137
*/
129138
export function LlamaLogLevelGreaterThanOrEqual(a: LlamaLogLevel, b: LlamaLogLevel): boolean {
130139
return LlamaLogLevelValues.indexOf(a) <= LlamaLogLevelValues.indexOf(b);

src/bindings/utils/compileLLamaCpp.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions
245245
else if (buildOptions.gpu === "cuda") {
246246
if (!ignoreWorkarounds.includes("cudaArchitecture") && (platform === "win" || platform === "linux") &&
247247
err instanceof SpawnError && (
248+
err.combinedStd.toLowerCase().includes("CUDA Toolkit not found".toLowerCase()) ||
248249
err.combinedStd.toLowerCase().includes("Failed to detect a default CUDA architecture".toLowerCase()) ||
249250
err.combinedStd.toLowerCase().includes("CMAKE_CUDA_COMPILER-NOTFOUND".toLowerCase()) || (
250251
err.combinedStd.toLowerCase().includes(
@@ -253,6 +254,10 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions
253254
err.combinedStd.toLowerCase().includes(
254255
'variable "CUDACXX" or the CMake cache entry CMAKE_CUDA_COMPILER to the full'.toLowerCase()
255256
)
257+
) || (
258+
err.combinedStd.toLowerCase().includes("The CUDA compiler".toLowerCase()) &&
259+
err.combinedStd.toLowerCase().includes("is not able to compile a simple test program".toLowerCase()) &&
260+
err.combinedStd.toLowerCase().includes("nvcc fatal".toLowerCase())
256261
)
257262
)) {
258263
for (const {nvccPath, cudaHomePath} of await getCudaNvccPaths()) {

src/bindings/utils/testBindingBinary.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,17 @@ if (process.env.TEST_BINDING_CP === "true" && (process.parentPort != null || pro
286286
throw new Error("Binding binary is not loaded");
287287

288288
binding.loadBackends();
289-
const loadedGpu = binding.getGpuType();
290-
if (loadedGpu == null || (loadedGpu === false && message.gpu !== false))
291-
binding.loadBackends(path.dirname(path.resolve(message.bindingBinaryPath)));
289+
let loadedGpu = binding.getGpuType();
290+
if (loadedGpu == null || (loadedGpu === false && message.gpu !== false)) {
291+
const backendsPath = path.dirname(path.resolve(message.bindingBinaryPath));
292+
const fallbackBackendsDir = path.join(backendsPath, "fallback");
293+
294+
binding.loadBackends(backendsPath);
295+
296+
loadedGpu = binding.getGpuType();
297+
if (loadedGpu == null || (loadedGpu === false && message.gpu !== false))
298+
binding.loadBackends(fallbackBackendsDir);
299+
}
292300

293301
await binding.init();
294302
binding.getGpuVramInfo();

0 commit comments

Comments
 (0)