tweak rocm.sh for building ggml-rocm.so easier

cjpais · cjpais · commit dd351268e6a2 · 2024-07-10T17:34:24.000-07:00
diff --git a/llamafile/rocm.sh b/llamafile/rocm.sh
@@ -5,6 +5,27 @@
 # NVCUDA DLLs are provided by the installation of the windows GPU
 # driver on a Windows system that has a CUDA-capable GPU installed.
 
+TMP=$(mktemp -d) || exit
+
+cp whisper.cpp/ggml-cuda.cu \
+   whisper.cpp/ggml-cuda.h \
+   whisper.cpp/ggml-impl.h \
+   whisper.cpp/ggml-alloc.h \
+   whisper.cpp/ggml-common.h \
+   whisper.cpp/ggml-backend.h \
+   whisper.cpp/ggml-backend-impl.h \
+   whisper.cpp/ggml.h \
+   llamafile/tinyblas.h \
+   llamafile/tinyblas.cu \
+   llamafile/llamafile.h \
+   llamafile/rocm.bat \
+   llamafile/rocm.sh \
+   llamafile/cuda.bat \
+   llamafile/cuda.sh \
+   "$TMP" || exit
+
+cd "$TMP"
+
 hipcc \
   -O3 \
   -fPIC \
@@ -22,6 +43,6 @@ hipcc \
   -DK_QUANTS_PER_ITERATION=2 \
   -DGGML_MINIMIZE_CODE_SIZE=1 \
   -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
-  --amdgpu-target=gfx1100,gfx1031,gfx1030,gfx1032,gfx906,gfx1101,gfx1102,gfx1103 \
-  -o ggml-rocm.so \
+  --offload-arch=gfx1100,gfx1031,gfx1030,gfx1032,gfx906,gfx1101,gfx1102,gfx1103 \
+  -o ~/ggml-rocm.so \
   ggml-cuda.cu