Skip to content

Commit 4286b99

Browse files
author
wangzaijun
committed
fix
1 parent 5af52bf commit 4286b99

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# PD decode mode for deepseek R1 (DP+EP) on H200
2+
# host: the host of the current node
3+
# pd_master_ip: the ip of the pd master
4+
# sh pd_decode.sh <host> <pd_master_ip>
5+
export host=$1
6+
export pd_master_ip=$2
7+
8+
export UCX_NET_DEVICES=$(ibv_devinfo | grep 'hca_id:' | grep -v -E 'mlx5_8|mlx5_9' | awk '{print $2":1"}' | paste -sd, -)
9+
export UCX_LOG_LEVEL=info
10+
export UCX_TLS=rc,cuda,gdr_copy
11+
12+
nvidia-cuda-mps-control -d
13+
MOE_MODE=EP KV_TRANS_USE_P2P=1 LOADWORKER=18 python -m lightllm.server.api_server \
14+
--model_dir /path/DeepSeek-R1 \
15+
--run_mode "nixl_decode" \
16+
--tp 8 \
17+
--dp 8 \
18+
--host $host \
19+
--port 8121 \
20+
--nccl_port 12322 \
21+
--enable_fa3 \
22+
--pd_master_ip $pd_master_ip \
23+
--pd_master_port 60011
24+
# if you want to enable microbatch overlap, you can uncomment the following lines
25+
#--enable_decode_microbatch_overlap
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# PD prefill mode for deepseek R1 (DP+EP) on H200
2+
# host: the host of the current node
3+
# pd_master_ip: the ip of the pd master
4+
# sh pd_prefill.sh <host> <pd_master_ip>
5+
6+
### nixl pd mode used
7+
export UCX_NET_DEVICES=$(ibv_devinfo | grep 'hca_id:' | grep -v -E 'mlx5_8|mlx5_9' | awk '{print $2":1"}' | paste -sd, -)
8+
export UCX_LOG_LEVEL=info
9+
export UCX_TLS=rc,cuda,gdr_copy
10+
11+
export host=$1
12+
export pd_master_ip=$2
13+
nvidia-cuda-mps-control -d
14+
MOE_MODE=EP KV_TRANS_USE_P2P=1 LOADWORKER=18 python -m lightllm.server.api_server \
15+
--model_dir /path/DeepSeek-R1 \
16+
--run_mode "nixl_prefill" \
17+
--tp 8 \
18+
--dp 8 \
19+
--host $host \
20+
--port 8019 \
21+
--nccl_port 2732 \
22+
--enable_fa3 \
23+
--disable_cudagraph \
24+
--pd_master_ip $pd_master_ip \
25+
--pd_master_port 60011
26+
# if you want to enable microbatch overlap, you can uncomment the following lines
27+
#--enable_prefill_microbatch_overlap

0 commit comments

Comments
 (0)