Skip to content

Commit 5df5219

Browse files
[Test] Add Deepseek3.2-exp-w8a8 nightly test
Signed-off-by: hfadzxy <starmoon_zhang@163.com>
1 parent c116524 commit 5df5219

File tree

3 files changed

+176
-0
lines changed

3 files changed

+176
-0
lines changed

.github/workflows/vllm_ascend_test_nightly_a3.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ jobs:
106106
- name: multi-node-qwen3-dp
107107
config_file_path: Qwen3-235B-A3B.yaml
108108
size: 2
109+
- name: multi-node-deepseek3.2-exp-dp
110+
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek3_2-Exp-W8A8.yaml
111+
size: 2
109112
- name: multi-node-dpsk-4node-pd
110113
config_file_path: DeepSeek-R1-W8A8.yaml
111114
size: 4
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
test_name: "test DeepSeek-V3.2-Exp-W8A8 disaggregated_prefill"
2+
model: "vllm-ascend/DeepSeek-V3.2-Exp-W8A8"
3+
num_nodes: 2
4+
npu_per_node: 16
5+
env_common:
6+
VLLM_USE_MODELSCOPE: true
7+
OMP_PROC_BIND: false
8+
OMP_NUM_THREADS: 100
9+
HCCL_BUFFSIZE: 1024
10+
SERVER_PORT: 8080
11+
disaggregated_prefill:
12+
enabled: true
13+
prefiller_host_index: [0]
14+
decoder_host_index: [1]
15+
16+
deployment:
17+
-
18+
server_cmd: >
19+
vllm serve "vllm-ascend/DeepSeek-V3.2-Exp-W8A8"
20+
--host 0.0.0.0
21+
--port $SERVER_PORT
22+
--data-parallel-size 2
23+
--data-parallel-size-local 2
24+
--tensor-parallel-size 8
25+
--seed 1024
26+
--enable-expert-parallel
27+
--max-num-seqs 16
28+
--max-model-len 8192
29+
--max-num-batched-tokens 8192
30+
--quantization ascend
31+
--trust-remote-code
32+
--no-enable-prefix-caching
33+
--gpu-memory-utilization 0.9
34+
--kv-transfer-config
35+
'{"kv_connector": "MooncakeConnector",
36+
"kv_role": "kv_producer",
37+
"kv_port": "30000",
38+
"engine_id": "0",
39+
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
40+
"kv_connector_extra_config": {
41+
"prefill": {
42+
"dp_size": 2,
43+
"tp_size": 8
44+
},
45+
"decode": {
46+
"dp_size": 2,
47+
"tp_size": 8
48+
}
49+
}
50+
}'
51+
52+
-
53+
server_cmd: >
54+
vllm serve "vllm-ascend/DeepSeek-V3.2-Exp-W8A8"
55+
--host 0.0.0.0
56+
--port $SERVER_PORT
57+
--data-parallel-size 2
58+
--data-parallel-size-local 2
59+
--tensor-parallel-size 8
60+
--seed 1024
61+
--quantization ascend
62+
--max-num-seqs 16
63+
--max-model-len 8192
64+
--max-num-batched-tokens 8192
65+
--enable-expert-parallel
66+
--trust-remote-code
67+
--no-enable-prefix-caching
68+
--gpu-memory-utilization 0.9
69+
--kv-transfer-config
70+
'{"kv_connector": "MooncakeConnector",
71+
"kv_role": "kv_consumer",
72+
"kv_port": "30200",
73+
"engine_id": "1",
74+
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
75+
"kv_connector_extra_config": {
76+
"prefill": {
77+
"dp_size": 2,
78+
"tp_size": 8
79+
},
80+
"decode": {
81+
"dp_size": 2,
82+
"tp_size": 8
83+
}
84+
}
85+
}'
86+
benchmarks:

tests/e2e/nightly/multi_node/scripts/run.sh

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,90 @@ install_ais_bench() {
107107
cd -
108108
}
109109

110+
install_go() {
111+
# Check if Go is already installed
112+
if command -v go &> /dev/null; then
113+
GO_VERSION=$(go version | awk '{print $3}')
114+
if [[ "$GO_VERSION" == "go$GOVER" ]]; then
115+
echo -e "${YELLOW}Go $GOVER is already installed. Skipping...${NC}"
116+
else
117+
echo -e "${YELLOW}Found Go $GO_VERSION. Will install Go $GOVER...${NC}"
118+
download_go
119+
fi
120+
else
121+
download_go
122+
fi
123+
124+
# Add Go to PATH if not already there
125+
if ! grep -q "export PATH=\$PATH:/usr/local/go/bin" ~/.bashrc; then
126+
echo -e "${YELLOW}Adding Go to your PATH in ~/.bashrc${NC}"
127+
echo 'export PATH=$PATH:/usr/local/go/bin' >> ~/.bashrc
128+
echo -e "${YELLOW}Please run 'source ~/.bashrc' or start a new terminal to use Go${NC}"
129+
fi
130+
export PATH=$PATH:/usr/local/go/bin
131+
}
132+
133+
install_extra_components() {
134+
echo "====> Installing extra components for DeepSeek-R1-W8A8"
135+
136+
mkdir -p /vllm-workspace/CANN
137+
138+
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run; then
139+
echo "Failed to download CANN-custom_ops-sfa-linux.aarch64.run"
140+
return 1
141+
fi
142+
chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
143+
./CANN-custom_ops-sfa-linux.aarch64.run --quiet
144+
145+
export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH}
146+
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH}
147+
148+
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl; then
149+
echo "Failed to download custom_ops wheel"
150+
return 1
151+
fi
152+
pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
153+
154+
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-mlapo-linux.aarch64.run; then
155+
echo "Failed to download CANN-custom_ops-mlapo-linux.aarch64.run"
156+
return 1
157+
fi
158+
chmod +x ./CANN-custom_ops-mlapo-linux.aarch64.run
159+
./CANN-custom_ops-mlapo-linux.aarch64.run --quiet --install-path=/vllm-workspace/CANN
160+
161+
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/torch_npu-2.7.1%2Bgitb7c90d0-cp311-cp311-linux_aarch64.whl; then
162+
echo "Failed to download torch_npu wheel"
163+
return 1
164+
fi
165+
pip install torch_npu-2.7.1+gitb7c90d0-cp311-cp311-linux_aarch64.whl
166+
167+
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/libopsproto_rt2.0.so; then
168+
echo "Failed to download libopsproto_rt2.0.so"
169+
return 1
170+
fi
171+
cp libopsproto_rt2.0.so /usr/local/Ascend/ascend-toolkit/8.2.RC1/opp/built-in/op_proto/lib/linux/aarch64/libopsproto_rt2.0.so
172+
173+
source /vllm-workspace/CANN/vendors/customize/bin/set_env.bash
174+
export LD_PRELOAD=/vllm-workspace/CANN/vendors/customize/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so:${LD_PRELOAD}
175+
176+
cat >> ~/.bashrc << 'EOF'
177+
178+
# Extra components for DeepSeek-R1-W8A8
179+
export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH}
180+
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH}
181+
source /vllm-workspace/CANN/vendors/customize/bin/set_env.bash
182+
export LD_PRELOAD=/vllm-workspace/CANN/vendors/customize/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so:${LD_PRELOAD}
183+
EOF
184+
185+
rm -f CANN-custom_ops-sfa-linux.aarch64.run \
186+
custom_ops-1.0-cp311-cp311-linux_aarch64.whl \
187+
CANN-custom_ops-mlapo-linux.aarch64.run \
188+
torch_npu-2.7.1+gitb7c90d0-cp311-cp311-linux_aarch64.whl \
189+
libopsproto_rt2.0.so
190+
191+
echo "====> Extra components installation completed"
192+
}
193+
110194
kill_npu_processes() {
111195
pgrep python3 | xargs -r kill -9
112196
pgrep VLLM | xargs -r kill -9
@@ -140,6 +224,9 @@ main() {
140224
checkout_src
141225
install_sys_dependencies
142226
install_vllm
227+
if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-R1-W8A8.yaml" ]]; then
228+
install_extra_components
229+
fi
143230
install_ais_bench
144231
cd "$WORKSPACE/source_code"
145232
. $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh

0 commit comments

Comments
 (0)