Skip to content

Commit 046b8ac

Browse files
committed
fix: prebuilt binaries CUDA 13 support
1 parent 12749c0 commit 046b8ac

File tree

3 files changed

+81
-23
lines changed

3 files changed

+81
-23
lines changed

.github/workflows/build.yml

Lines changed: 59 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,18 @@ jobs:
5656
fail-fast: false
5757
matrix:
5858
config:
59-
- name: "Windows for x64"
59+
- name: "Windows (1)"
6060
os: windows-2022
61-
artifact: "win-x64"
62-
- name: "Windows for Arm"
61+
artifact: "win-1"
62+
- name: "Windows (2)"
6363
os: windows-2022
64-
artifact: "win-arm"
65-
- name: "Ubuntu"
64+
artifact: "win-2"
65+
- name: "Ubuntu (1)"
66+
os: ubuntu-22.04
67+
artifact: "linux-1"
68+
- name: "Ubuntu (2)"
6669
os: ubuntu-22.04
67-
artifact: "linux"
70+
artifact: "linux-2"
6871
- name: "macOS x64"
6972
os: macos-13
7073
artifact: "mac-x64"
@@ -98,7 +101,7 @@ jobs:
98101
choco install ninja
99102
100103
- name: Install dependencies on Ubuntu
101-
if: matrix.config.name == 'Ubuntu'
104+
if: matrix.config.name == 'Ubuntu (1)' || matrix.config.name == 'Ubuntu (2)'
102105
run: |
103106
sudo apt-get update
104107
sudo apt-get install ninja-build cmake libtbb-dev g++-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf
@@ -109,24 +112,40 @@ jobs:
109112
which arm-linux-gnueabihf-gcc
110113
which arm-linux-gnueabihf-g++
111114
112-
- name: Install Cuda on Windows for x64
113-
if: matrix.config.name == 'Windows for x64'
115+
- name: Install Cuda 12.4 on Windows (1)
116+
if: matrix.config.name == 'Windows (1)'
114117
uses: Jimver/[email protected]
115118
with:
116119
cuda: '12.4.0'
117120
method: 'network'
118121
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
119122
use-local-cache: false
120123

121-
- name: Install Cuda on Ubuntu
122-
if: matrix.config.name == 'Ubuntu'
124+
- name: Install Cuda 13.0 on Windows (2)
125+
if: matrix.config.name == 'Windows (2)'
126+
uses: Jimver/[email protected]
127+
with:
128+
cuda: '13.0.0'
129+
method: 'network'
130+
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
131+
use-local-cache: false
132+
133+
- name: Install Cuda 12.4 on Ubuntu
134+
if: matrix.config.name == 'Ubuntu (1)'
123135
uses: Jimver/[email protected]
124136
with:
125137
cuda: '12.4.0'
126138
method: 'network'
127139

128-
- name: Install Vulkan SDK on Windows for x64
129-
if: matrix.config.name == 'Windows for x64'
140+
- name: Install Cuda 13.0 on Ubuntu
141+
if: matrix.config.name == 'Ubuntu (2)'
142+
uses: Jimver/[email protected]
143+
with:
144+
cuda: '13.0.0'
145+
method: 'network'
146+
147+
- name: Install Vulkan SDK on Windows (1)
148+
if: matrix.config.name == 'Windows (1)'
130149
shell: powershell
131150
env:
132151
VULKAN_VERSION: 1.3.261.1
@@ -137,7 +156,7 @@ jobs:
137156
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
138157
139158
- name: Install Vulkan SDK on Ubuntu
140-
if: matrix.config.name == 'Ubuntu'
159+
if: matrix.config.name == 'Ubuntu (1)'
141160
run: |
142161
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
143162
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
@@ -205,18 +224,21 @@ jobs:
205224
}
206225
207226
// build binaries
208-
if (process.env.ARTIFACT_NAME === "win-x64") {
227+
if (process.env.ARTIFACT_NAME === "win-1") {
209228
await buildBinary("x64", ["--gpu", "false"]);
210229
await buildBinary("x64", ["--gpu", "cuda"]);
211230
await buildBinary("x64", ["--gpu", "vulkan"]);
212-
} else if (process.env.ARTIFACT_NAME === "win-arm") {
231+
} else if (process.env.ARTIFACT_NAME === "win-2") {
213232
await buildBinary("arm64", ["--gpu", "false"], windowsOnArmNodeVersion);
214-
} else if (process.env.ARTIFACT_NAME === "linux") {
233+
await buildBinary("x64", ["--gpu", "cuda"]);
234+
} else if (process.env.ARTIFACT_NAME === "linux-1") {
215235
await buildBinary("x64", ["--gpu", "false"]);
216236
await buildBinary("x64", ["--gpu", "cuda"]);
217237
await buildBinary("x64", ["--gpu", "vulkan"]);
218238
await buildBinary("arm64", ["--gpu", "false"]);
219239
await buildBinary("armv7l", ["--gpu", "false"]);
240+
} else if (process.env.ARTIFACT_NAME === "linux-2") {
241+
await buildBinary("x64", ["--gpu", "cuda"]);
220242
} else if (process.env.ARTIFACT_NAME === "mac-x64") {
221243
await buildBinary("x64", ["--gpu", "false"]);
222244
} else if (process.env.ARTIFACT_NAME === "mac-arm64") {
@@ -233,6 +255,18 @@ jobs:
233255
);
234256
}
235257
258+
if (process.env.ARTIFACT_NAME === "win-2") {
259+
await fs.move(
260+
path.join(llamaBinsDirectoryPath, "win-x64-cuda"),
261+
path.join(llamaBinsDirectoryPath, "win-x64-cuda-2")
262+
);
263+
} else if (process.env.ARTIFACT_NAME === "linux-2") {
264+
await fs.move(
265+
path.join(llamaBinsDirectoryPath, "linux-x64-cuda"),
266+
path.join(llamaBinsDirectoryPath, "linux-x64-cuda-2")
267+
);
268+
}
269+
236270
await $`echo "Built binaries:"`;
237271
await $`ls bins`;
238272
@@ -494,6 +528,14 @@ jobs:
494528
mv artifacts/bins-*/* bins/
495529
mv artifacts/build dist/
496530
531+
mkdir -p ./bins/win-x64-cuda/bins/win-x64-cuda/fallback
532+
mv ./bins/win-x64-cuda-2/bins/win-x64-cuda/ggml-cuda.dll bins/win-x64-cuda/bins/win-x64-cuda/fallback/ggml-cuda.dll
533+
rm -rf ./bins/win-x64-cuda-2
534+
535+
mkdir -p ./bins/linux-x64-cuda/bins/linux-x64-cuda/fallback
536+
mv ./bins/linux-x64-cuda-2/bins/linux-x64-cuda/libggml-cuda.so bins/linux-x64-cuda/bins/linux-x64-cuda/fallback/libggml-cuda.so
537+
rm -rf ./bins/linux-x64-cuda-2
538+
497539
cp -r artifacts/llama.cpp/llama.cpp/grammars llama/grammars
498540
499541
rm -f ./llama/binariesGithubRelease.json

src/bindings/Llama.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,17 @@ export class Llama {
107107
}
108108

109109
bindings.loadBackends();
110-
const loadedGpu = bindings.getGpuType();
111-
if (loadedGpu == null || (loadedGpu === false && buildGpu !== false))
112-
bindings.loadBackends(path.dirname(bindingPath));
110+
let loadedGpu = bindings.getGpuType();
111+
if (loadedGpu == null || (loadedGpu === false && buildGpu !== false)) {
112+
const backendsPath = path.dirname(bindingPath);
113+
const fallbackBackendsDir = path.join(backendsPath, "fallback");
114+
115+
bindings.loadBackends(backendsPath);
116+
117+
loadedGpu = bindings.getGpuType();
118+
if (loadedGpu == null || (loadedGpu === false && buildGpu !== false))
119+
bindings.loadBackends(fallbackBackendsDir);
120+
}
113121

114122
bindings.ensureGpuDeviceIsSupported();
115123

src/bindings/utils/testBindingBinary.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,17 @@ if (process.env.TEST_BINDING_CP === "true" && (process.parentPort != null || pro
286286
throw new Error("Binding binary is not loaded");
287287

288288
binding.loadBackends();
289-
const loadedGpu = binding.getGpuType();
290-
if (loadedGpu == null || (loadedGpu === false && message.gpu !== false))
291-
binding.loadBackends(path.dirname(path.resolve(message.bindingBinaryPath)));
289+
let loadedGpu = binding.getGpuType();
290+
if (loadedGpu == null || (loadedGpu === false && message.gpu !== false)) {
291+
const backendsPath = path.dirname(path.resolve(message.bindingBinaryPath));
292+
const fallbackBackendsDir = path.join(backendsPath, "fallback");
293+
294+
binding.loadBackends(backendsPath);
295+
296+
loadedGpu = binding.getGpuType();
297+
if (loadedGpu == null || (loadedGpu === false && message.gpu !== false))
298+
binding.loadBackends(fallbackBackendsDir);
299+
}
292300

293301
await binding.init();
294302
binding.getGpuVramInfo();

0 commit comments

Comments
 (0)