File tree Expand file tree Collapse file tree 2 files changed +52
-0
lines changed
test/start_scripts/single_pd_master Expand file tree Collapse file tree 2 files changed +52
-0
lines changed Original file line number Diff line number Diff line change 1+ # PD decode mode for deepseek R1 (DP+EP) on H200
2+ # host: the host of the current node
3+ # pd_master_ip: the ip of the pd master
4+ # sh pd_decode.sh <host> <pd_master_ip>
5+ export host=$1
6+ export pd_master_ip=$2
7+
8+ export UCX_NET_DEVICES=$( ibv_devinfo | grep ' hca_id:' | grep -v -E ' mlx5_8|mlx5_9' | awk ' {print $2":1"}' | paste -sd, -)
9+ export UCX_LOG_LEVEL=info
10+ export UCX_TLS=rc,cuda,gdr_copy
11+
12+ nvidia-cuda-mps-control -d
13+ MOE_MODE=EP KV_TRANS_USE_P2P=1 LOADWORKER=18 python -m lightllm.server.api_server \
14+ --model_dir /path/DeepSeek-R1 \
15+ --run_mode " nixl_decode" \
16+ --tp 8 \
17+ --dp 8 \
18+ --host $host \
19+ --port 8121 \
20+ --nccl_port 12322 \
21+ --enable_fa3 \
22+ --pd_master_ip $pd_master_ip \
23+ --pd_master_port 60011
24+ # if you want to enable microbatch overlap, you can uncomment the following lines
25+ # --enable_decode_microbatch_overlap
Original file line number Diff line number Diff line change 1+ # PD prefill mode for deepseek R1 (DP+EP) on H200
2+ # host: the host of the current node
3+ # pd_master_ip: the ip of the pd master
4+ # sh pd_prefill.sh <host> <pd_master_ip>
5+
6+ # ## nixl pd mode used
7+ export UCX_NET_DEVICES=$( ibv_devinfo | grep ' hca_id:' | grep -v -E ' mlx5_8|mlx5_9' | awk ' {print $2":1"}' | paste -sd, -)
8+ export UCX_LOG_LEVEL=info
9+ export UCX_TLS=rc,cuda,gdr_copy
10+
11+ export host=$1
12+ export pd_master_ip=$2
13+ nvidia-cuda-mps-control -d
14+ MOE_MODE=EP KV_TRANS_USE_P2P=1 LOADWORKER=18 python -m lightllm.server.api_server \
15+ --model_dir /path/DeepSeek-R1 \
16+ --run_mode " nixl_prefill" \
17+ --tp 8 \
18+ --dp 8 \
19+ --host $host \
20+ --port 8019 \
21+ --nccl_port 2732 \
22+ --enable_fa3 \
23+ --disable_cudagraph \
24+ --pd_master_ip $pd_master_ip \
25+ --pd_master_port 60011
26+ # if you want to enable microbatch overlap, you can uncomment the following lines
27+ # --enable_prefill_microbatch_overlap
You can’t perform that action at this time.
0 commit comments