Skip to content

Commit dd35126

Browse files
committed
tweak rocm.sh for building ggml-rocm.so easier
1 parent ac289b3 commit dd35126

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

llamafile/rocm.sh

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,27 @@
55
# NVCUDA DLLs are provided by the installation of the windows GPU
66
# driver on a Windows system that has a CUDA-capable GPU installed.
77

8+
TMP=$(mktemp -d) || exit
9+
10+
cp whisper.cpp/ggml-cuda.cu \
11+
whisper.cpp/ggml-cuda.h \
12+
whisper.cpp/ggml-impl.h \
13+
whisper.cpp/ggml-alloc.h \
14+
whisper.cpp/ggml-common.h \
15+
whisper.cpp/ggml-backend.h \
16+
whisper.cpp/ggml-backend-impl.h \
17+
whisper.cpp/ggml.h \
18+
llamafile/tinyblas.h \
19+
llamafile/tinyblas.cu \
20+
llamafile/llamafile.h \
21+
llamafile/rocm.bat \
22+
llamafile/rocm.sh \
23+
llamafile/cuda.bat \
24+
llamafile/cuda.sh \
25+
"$TMP" || exit
26+
27+
cd "$TMP"
28+
829
hipcc \
930
-O3 \
1031
-fPIC \
@@ -22,6 +43,6 @@ hipcc \
2243
-DK_QUANTS_PER_ITERATION=2 \
2344
-DGGML_MINIMIZE_CODE_SIZE=1 \
2445
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
25-
--amdgpu-target=gfx1100,gfx1031,gfx1030,gfx1032,gfx906,gfx1101,gfx1102,gfx1103 \
26-
-o ggml-rocm.so \
46+
--offload-arch=gfx1100,gfx1031,gfx1030,gfx1032,gfx906,gfx1101,gfx1102,gfx1103 \
47+
-o ~/ggml-rocm.so \
2748
ggml-cuda.cu

0 commit comments

Comments
 (0)