From 32d763ba4ccd9cfce44c5d2045685ddd7b76301c Mon Sep 17 00:00:00 2001 From: Xiaodong Ye Date: Mon, 10 Mar 2025 09:41:35 +0800 Subject: [PATCH] musa: support new arch mp_31 and update doc Signed-off-by: Xiaodong Ye --- Makefile | 2 +- docs/build.md | 46 +++++++++++++++++++++++-------- ggml/src/ggml-musa/CMakeLists.txt | 2 +- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 5339d490b4e68..1f9455eff0aec 100644 --- a/Makefile +++ b/Makefile @@ -836,7 +836,7 @@ ifdef GGML_MUSA else MUSA_PATH ?= /opt/musa endif - MUSA_ARCHITECTURES ?= 21;22 + MUSA_ARCHITECTURES ?= 21;22;31 MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib diff --git a/docs/build.md b/docs/build.md index 3d8333328fce0..2e3975c145360 100644 --- a/docs/build.md +++ b/docs/build.md @@ -197,28 +197,52 @@ The following compilation options are also available to tweak performance: ## MUSA -This provides GPU acceleration using the MUSA cores of your Moore Threads MTT GPU. Make sure to have the MUSA SDK installed. You can download it from here: [MUSA SDK](https://developer.mthreads.com/sdk/download/musa). +This provides GPU acceleration using a Moore Threads GPU. Make sure to have the [MUSA SDK](https://developer.mthreads.com/musa/musa-sdk) installed. -- Using `CMake`: +#### Download directly from Moore Threads - ```bash - cmake -B build -DGGML_MUSA=ON - cmake --build build --config Release - ``` +You may find the official downloads here: [Moore Threads developer site](https://developer.mthreads.com/sdk/download/musa). - For static build: +### Compilation - ```bash +```bash +cmake -B build -DGGML_MUSA=ON +cmake --build build --config Release +``` + +#### Override Compute Capability Specifications + +By default, all supported compute capabilities are enabled. To customize this behavior, you can specify the `MUSA_ARCHITECTURES` option in the CMake command: + +```bash +cmake -B build -DGGML_MUSA=ON -DMUSA_ARCHITECTURES="21" +``` + +This configuration enables only compute capability `2.1` (MTT S80) during compilation, which can help reduce compilation time. + +#### Compilation options + +Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet. + +- For static builds, add `-DBUILD_SHARED_LIBS=OFF` and `-DCMAKE_POSITION_INDEPENDENT_CODE=ON`: + ``` cmake -B build -DGGML_MUSA=ON \ -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON cmake --build build --config Release ``` -The environment variable [`MUSA_VISIBLE_DEVICES`](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) can be used to specify which GPU(s) will be used. +### Runtime MUSA environmental variables -The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. +You may set the [musa environmental variables](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) at runtime. -Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet. +```bash +# Use `MUSA_VISIBLE_DEVICES` to hide the first compute device. +MUSA_VISIBLE_DEVICES="-0" ./build/bin/llama-server --model /srv/models/llama.gguf +``` + +### Unified Memory + +The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. ## HIP diff --git a/ggml/src/ggml-musa/CMakeLists.txt b/ggml/src/ggml-musa/CMakeLists.txt index 2c75abf61d672..166970ca6bfb8 100644 --- a/ggml/src/ggml-musa/CMakeLists.txt +++ b/ggml/src/ggml-musa/CMakeLists.txt @@ -21,7 +21,7 @@ if (MUSAToolkit_FOUND) message(STATUS "MUSA Toolkit found") if (NOT DEFINED MUSA_ARCHITECTURES) - set(MUSA_ARCHITECTURES "21;22") + set(MUSA_ARCHITECTURES "21;22;31") endif() message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}")