Skip to content

Commit 96d2518

Browse files
authored
Add musa backend (#1995)
1 parent be7d5a5 commit 96d2518

File tree

124 files changed

+17238
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+17238
-0
lines changed

backends/musa/CMakeLists.txt

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# Copyright (c) 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All
2+
# rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
# use this file except in compliance with the License. You may obtain a copy of
6+
# the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
# License for the specific language governing permissions and limitations under
14+
# the License
15+
16+
cmake_minimum_required(VERSION 3.10)
17+
18+
project(paddle-musa CXX C)
19+
20+
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
21+
22+
set(MUSA_PATH
23+
"/usr/local/musa"
24+
CACHE PATH "Path to which musa has been installed")
25+
list(APPEND CMAKE_MODULE_PATH "${MUSA_PATH}/cmake")
26+
find_package(MUSA REQUIRED)
27+
28+
set(PLUGIN_NAME "paddle-musa")
29+
set(PLUGIN_VERSION "1.0.0")
30+
set(PADDLE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../Paddle/")
31+
set(THIRD_PARTY_PATH
32+
"${PADDLE_SOURCE_DIR}/build/third_party"
33+
CACHE PATH "Third party libraries directory.")
34+
35+
option(WITH_TESTING "compile with unit testing" ON)
36+
option(ON_INFER "compile with inference c++ lib" OFF)
37+
option(WITH_MKL "compile with mkl support" ON)
38+
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
39+
option(WITH_MUSA "Compile PaddlePaddle with MUSA" ON)
40+
option(WITH_MCCL "Compile PaddlePaddle with MCCL support" ON)
41+
42+
message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
43+
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
44+
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
45+
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
46+
message(STATUS "AR tools: ${CMAKE_AR}")
47+
48+
if(NOT WITH_TESTING)
49+
set(NO_PADDLE_SUBMODULE ON)
50+
endif()
51+
52+
if(NOT WITH_MUSA AND WITH_MCCL)
53+
message(
54+
WARNING "Disable MCCL when compiling without MUSA. Force WITH_MCCL=OFF.")
55+
set(WITH_MCCL
56+
OFF
57+
CACHE STRING "Disable MCCL when compiling without MUSA" FORCE)
58+
endif()
59+
60+
# build shared library
61+
include(utils)
62+
include(paddle)
63+
include(external/eigen)
64+
include(version)
65+
include(generic)
66+
include_directories(${CMAKE_SOURCE_DIR})
67+
include_directories(
68+
${PADDLE_INC_DIR}
69+
${CMAKE_SOURCE_DIR}
70+
${CMAKE_SOURCE_DIR}/kernels
71+
${CMAKE_SOURCE_DIR}/kernels/paddle_kernels
72+
${CMAKE_SOURCE_DIR}/hack/cuda_hack
73+
${CMAKE_SOURCE_DIR}/hack/thrust_hack
74+
${CMAKE_SOURCE_DIR}/hack/paddle_backend_dyload_hack
75+
${CMAKE_SOURCE_DIR}/hack/paddle_platform_dyload_hack)
76+
include_directories(${PADDLE_SOURCE_DIR})
77+
file(
78+
GLOB
79+
PADDLE_KERNEL_SRCS
80+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/abs_kernel.cu
81+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/abs_grad_kernel.cu
82+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_entropy2_kernel.cu
83+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_entropy2_grad_kernel.cu
84+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_kernel.cu
85+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_grad_kernel.cu
86+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/activation_kernel.cu
87+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/activation_kernel.cc
88+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/activation_grad_kernel.cu
89+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adam_kernel.cu
90+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adamw_kernel.cu
91+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
92+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adamax_kernel.cu
93+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/angle_grad_kernel.cu
94+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/angle_kernel.cu)
95+
file(
96+
GLOB_RECURSE PLUGIN_SRCS
97+
RELATIVE ${CMAKE_SOURCE_DIR}
98+
${CMAKE_SOURCE_DIR}/runtime/*.cc kernels/*.cc kernels/paddle_kernels/*.cc
99+
# kernels/paddle_kernels/*.mu
100+
hack/paddle_backend_dyload_hack/*.cc hack/paddle_platform_dyload_hack/*.cc)
101+
set(CUSTOM_DEVICE_SRCS ${PLUGIN_SRCS} ${PADDLE_KERNEL_SRCS})
102+
# target_include_directories(${PLUGIN_NAME} PUBLIC
103+
# ${CMAKE_SOURCE_DIR}/hack/paddle_backend_dyload_hack)
104+
include(musa)
105+
include(mudnn)
106+
musa_add_library(${PLUGIN_NAME} SHARED ${CUSTOM_DEVICE_SRCS})
107+
target_include_directories(
108+
${PLUGIN_NAME} PUBLIC ${CMAKE_SOURCE_DIR}/hack/cuda_hack
109+
${CMAKE_SOURCE_DIR}/hack/thrust_hack)
110+
link_directories(${PADDLE_LIB_DIR})
111+
112+
# link paddle core lib
113+
if(ON_INFER)
114+
target_link_directories(${PLUGIN_NAME} ${PADDLE_INFERENCE_LIB_DIR})
115+
target_link_libraries(${PLUGIN_NAME} paddle_inference)
116+
else()
117+
target_link_libraries(${PLUGIN_NAME} ${PADDLE_CORE_LIB})
118+
# target_link_libraries(${PLUGIN_NAME} PRIVATE pybind)
119+
endif()
120+
121+
# link musa rt mudnn
122+
target_link_libraries(${PLUGIN_NAME} ${musart_lib} ${mudnn_lib})
123+
# target_link_libraries(${PLUGIN_NAME} ${mudnn_lib})
124+
125+
# link mccl
126+
if(WITH_MCCL)
127+
add_definitions("-DPADDLE_WITH_MCCL")
128+
include(mccl)
129+
target_link_libraries(${PLUGIN_NAME} ${mccl_lib})
130+
else()
131+
if(WITH_MUSA)
132+
message(
133+
WARNING
134+
"If the environment is multi-card, the WITH_MCCL option needs to be turned on, otherwise only a single card can be used."
135+
)
136+
endif()
137+
endif()
138+
139+
# link third party
140+
include(third_party)
141+
add_dependencies(${PLUGIN_NAME} third_party)
142+
target_link_libraries(${PLUGIN_NAME} eigen3 gflags glog)
143+
target_compile_definitions(
144+
${PLUGIN_NAME}
145+
PUBLIC __NVCC__=__MUSACC__
146+
__CUDACC_VER_MAJOR__=99
147+
__CUDACC_VER_MINOR__=10000
148+
MARCH_TYPE=310 # TODO: the version num should be obtained automatically
149+
# here
150+
GPUContext=CustomContext
151+
KPSContext=CustomContext
152+
STREAM_TYPE=musaStream_t
153+
EVENT_TYPE=musaEvent_t
154+
EIGEN_USE_GPU=1
155+
PADDLE_WITH_MUSA=1
156+
PADDLE_WITH_CUSTOM_DEVICE=1
157+
EIGEN_USE_MUSA=1)
158+
159+
# packing wheel package
160+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
161+
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
162+
163+
add_custom_command(
164+
TARGET ${PLUGIN_NAME}
165+
POST_BUILD
166+
COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/python/
167+
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/python/
168+
COMMAND ${CMAKE_COMMAND} -E make_directory
169+
${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/
170+
COMMAND
171+
${CMAKE_COMMAND} -E copy_if_different
172+
${CMAKE_CURRENT_BINARY_DIR}/lib${PLUGIN_NAME}.so
173+
${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/
174+
COMMENT "Creating plugin directories------>>>")
175+
176+
find_package(
177+
Python
178+
COMPONENTS Interpreter
179+
REQUIRED)
180+
181+
add_custom_command(
182+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/python/.timestamp
183+
COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/setup.py bdist_wheel
184+
DEPENDS ${PLUGIN_NAME}
185+
COMMENT "Packing whl packages------>>>")
186+
187+
add_custom_target(python_package ALL
188+
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/python/.timestamp)
189+
190+
if(WITH_TESTING)
191+
set(PYTHON_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../Paddle")
192+
enable_testing()
193+
add_subdirectory(tests)
194+
add_custom_command(
195+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tests/.timestamp
196+
COMMAND cp -r ${CMAKE_SOURCE_DIR}/tests ${CMAKE_CURRENT_BINARY_DIR})
197+
add_custom_target(python_tests ALL
198+
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tests/.timestamp)
199+
endif()

backends/musa/README.md

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# PaddlePaddle Custom Device Implementaion for Custom CPU
2+
3+
English | [简体中文](./README_cn.md) | [日本語](./README_ja.md)
4+
5+
Please refer to the following steps to compile, install and verify the custom device implementaion for Custom CPU.
6+
7+
## Prepare environment and source code
8+
9+
```bash
10+
# 1. pull PaddlePaddle CPU development docker image
11+
# dockerfile of the image is in tools/dockerfile directory
12+
docker pull registry.baidubce.com/device/paddle-cpu:ubuntu20-x86_64-gcc84-py310
13+
docker pull registry.baidubce.com/device/paddle-cpu:ubuntu20-aarch64-gcc84-py310
14+
15+
# 2. refer to the following commands to start docker container
16+
docker run -it --name paddle-dev-cpu -v `pwd`:/workspace \
17+
--network=host --shm-size=128G --workdir=/workspace \
18+
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
19+
registry.baidubce.com/device/paddle-cpu:ubuntu20-$(uname -m)-gcc84-py310 /bin/bash
20+
21+
# 3. clone the source code recursively along with Paddle source code
22+
git clone --recursive https://github.com/PaddlePaddle/PaddleCustomDevice
23+
cd PaddleCustomDevice
24+
25+
# 4. execute the following commands to update submodule
26+
git submodule sync
27+
git submodule update --remote --init --recursive
28+
```
29+
30+
## Compile and Install
31+
32+
```bash
33+
# navigate to implementaion for Custom CPU
34+
cd backends/custom_cpu
35+
36+
# before compiling, ensure that Paddle is installed, you can run the following command
37+
pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
38+
39+
# create the build directory and navigate in
40+
mkdir build && cd build
41+
42+
cmake ..
43+
make -j8
44+
45+
# using pip to install the output
46+
pip install dist/paddle_custom_cpu*.whl
47+
```
48+
49+
## Verification
50+
51+
```bash
52+
# list available hardware backends
53+
python -c "import paddle; print(paddle.device.get_all_custom_device_type())"
54+
55+
# expected output
56+
['custom_cpu']
57+
58+
# run a simple model
59+
python ../tests/test_MNIST_model.py
60+
61+
# expected similar output
62+
... ...
63+
Epoch 0 step 0, Loss = [2.2956038], Accuracy = 0.15625
64+
Epoch 0 step 100, Loss = [2.1552896], Accuracy = 0.3125
65+
Epoch 0 step 200, Loss = [2.1177733], Accuracy = 0.4375
66+
Epoch 0 step 300, Loss = [2.0089214], Accuracy = 0.53125
67+
Epoch 0 step 400, Loss = [2.0845466], Accuracy = 0.421875
68+
Epoch 0 step 500, Loss = [2.0473], Accuracy = 0.453125
69+
Epoch 0 step 600, Loss = [1.8561764], Accuracy = 0.71875
70+
Epoch 0 step 700, Loss = [1.9915285], Accuracy = 0.53125
71+
Epoch 0 step 800, Loss = [1.8925955], Accuracy = 0.640625
72+
Epoch 0 step 900, Loss = [1.8199624], Accuracy = 0.734375
73+
```
74+
75+
## Using PaddleInference
76+
77+
Re-compile plugin
78+
79+
```bash
80+
# Compile PaddleInference
81+
git clone https://github.com/PaddlePaddle/Paddle.git
82+
git clone https://github.com/ronny1996/Paddle-Inference-Demo.git
83+
84+
mkdir -p Paddle/build
85+
pushd Paddle/build
86+
87+
cmake .. -DPY_VERSION=3.7 -DWITH_GPU=OFF -DWITH_TESTING=ON -DCMAKE_BUILD_TYPE=Release -DON_INFER=ON -DWITH_MKL=ON -DWITH_CUSTOM_DEVICE=ON
88+
89+
make -j8
90+
91+
popd
92+
cp -R Paddle/build/paddle_inference_install_dir Paddle-Inference-Demo/c++/lib/paddle_inference
93+
export PADDLE_INFERENCE_LIB_DIR=$(realpath Paddle-Inference-Demo/c++/lib/paddle_inference/paddle/lib)
94+
95+
# Compile the plug-in
96+
mkdir -p PaddleCustomDevice/backends/custom_cpu/build
97+
pushd PaddleCustomDevice/backends/custom_cpu/build
98+
99+
cmake .. -DON_INFER=ON -DPADDLE_INFERENCE_LIB_DIR=${PADDLE_INFERENCE_LIB_DIR}
100+
make -j8
101+
102+
# Specify the plug-in directory
103+
export CUSTOM_DEVICE_ROOT=$PWD
104+
popd
105+
```
106+
107+
Using PaddleInference
108+
109+
```bash
110+
pushd Paddle-Inference-Demo/c++/resnet50
111+
112+
# Modify resnet50_test.cc, use config.EnableCustomDevice("custom_cpu", 0) to replace config.EnableUseGpu(100, 0)
113+
114+
bash run.sh
115+
```
116+
117+
expected similar output
118+
119+
```bash
120+
I0713 09:02:38.808723 24792 resnet50_test.cc:74] run avg time is 297.75 ms
121+
I0713 09:02:38.808859 24792 resnet50_test.cc:89] 0 : 8.76192e-29
122+
I0713 09:02:38.808894 24792 resnet50_test.cc:89] 100 : 8.76192e-29
123+
I0713 09:02:38.808904 24792 resnet50_test.cc:89] 200 : 8.76192e-29
124+
I0713 09:02:38.808912 24792 resnet50_test.cc:89] 300 : 8.76192e-29
125+
I0713 09:02:38.808920 24792 resnet50_test.cc:89] 400 : 8.76192e-29
126+
I0713 09:02:38.808928 24792 resnet50_test.cc:89] 500 : 8.76192e-29
127+
I0713 09:02:38.808936 24792 resnet50_test.cc:89] 600 : 1.05766e-19
128+
I0713 09:02:38.808945 24792 resnet50_test.cc:89] 700 : 2.04093e-23
129+
I0713 09:02:38.808954 24792 resnet50_test.cc:89] 800 : 3.85255e-25
130+
I0713 09:02:38.808961 24792 resnet50_test.cc:89] 900 : 8.76192e-29
131+
```

0 commit comments

Comments
 (0)