@@ -30,13 +30,12 @@ Suitable for deploying DeepSeek-R1 model on a single H200 node.
3030.. code-block :: bash
3131
3232 # H200 Single node DeepSeek-R1 TP Mode
33- LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
33+ python -m lightllm.server.api_server --port 8088 \
3434 --model_dir /path/DeepSeek-R1 \
3535 --tp 8 \
3636 --enable_fa3
3737
3838 **Parameter Description: **
39- - `LOADWORKER=18 `: Model loading thread count, improves loading speed
4039- `--tp 8 `: Tensor parallelism, using 8 GPUs
4140- `--enable_fa3 `: Enable Flash Attention 3.0
4241- `--port 8088 `: Service port
@@ -51,7 +50,7 @@ Suitable for expert parallelism deployment of MoE models like DeepSeek-V2/V3.
5150.. code-block :: bash
5251
5352 # H200 Single node DeepSeek-R1 DP + EP Mode
54- MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
53+ MOE_MODE=EP python -m lightllm.server.api_server --port 8088 \
5554 --model_dir /path/DeepSeek-R1 \
5655 --tp 8 \
5756 --dp 8 \
@@ -82,7 +81,7 @@ Suitable for deployment across multiple H200/H100 nodes.
8281 # H200/H100 Multi-node DeepSeek-R1 TP Mode Node 0
8382 # Usage: sh multi_node_tp_node0.sh <nccl_host>
8483 export nccl_host=$1
85- LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
84+ python -m lightllm.server.api_server --port 8088 \
8685 --model_dir /path/DeepSeek-R1 \
8786 --tp 16 \
8887 --enable_fa3 \
@@ -98,7 +97,7 @@ Suitable for deployment across multiple H200/H100 nodes.
9897 # H200/H100 Multi-node DeepSeek-R1 TP Mode Node 1
9998 # Usage: sh multi_node_tp_node1.sh <nccl_host>
10099 export nccl_host=$1
101- LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
100+ python -m lightllm.server.api_server --port 8088 \
102101 --model_dir /path/DeepSeek-R1 \
103102 --tp 16 \
104103 --enable_fa3 \
@@ -125,7 +124,7 @@ Suitable for deploying MoE models across multiple nodes.
125124 # H200 Multi-node DeepSeek-R1 EP Mode Node 0
126125 # Usage: sh multi_node_ep_node0.sh <nccl_host>
127126 export nccl_host=$1
128- MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
127+ MOE_MODE=EP python -m lightllm.server.api_server --port 8088 \
129128 --model_dir /path/DeepSeek-R1 \
130129 --tp 16 \
131130 --dp 16 \
@@ -142,7 +141,7 @@ Suitable for deploying MoE models across multiple nodes.
142141 # H200 Multi-node DeepSeek-R1 EP Mode Node 1
143142 # Usage: sh multi_node_ep_node1.sh <nccl_host>
144143 export nccl_host=$1
145- MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
144+ MOE_MODE=EP python -m lightllm.server.api_server --port 8088 \
146145 --model_dir /path/DeepSeek-R1 \
147146 --tp 16 \
148147 --dp 16 \
@@ -187,7 +186,7 @@ PD (Prefill-Decode) disaggregation mode separates prefill and decode stages for
187186 export host=$1
188187 export pd_master_ip=$2
189188 nvidia-cuda-mps-control -d
190- MOE_MODE=EP KV_TRANS_USE_P2P=1 LOADWORKER=18 python -m lightllm.server.api_server \
189+ MOE_MODE=EP KV_TRANS_USE_P2P=1 python -m lightllm.server.api_server \
191190 --model_dir /path/DeepSeek-R1 \
192191 --run_mode " prefill" \
193192 --tp 8 \
@@ -197,7 +196,7 @@ PD (Prefill-Decode) disaggregation mode separates prefill and decode stages for
197196 --nccl_port 2732 \
198197 --enable_fa3 \
199198 --disable_cudagraph \
200- --pd_master_ip $pd_master_ip
199+ --pd_master_ip $pd_master_ip
201200
202201 **Step 3: Launch Decode Service **
203202
@@ -208,7 +207,7 @@ PD (Prefill-Decode) disaggregation mode separates prefill and decode stages for
208207 export host=$1
209208 export pd_master_ip=$2
210209 nvidia-cuda-mps-control -d
211- MOE_MODE=EP KV_TRANS_USE_P2P=1 LOADWORKER=18 python -m lightllm.server.api_server \
210+ MOE_MODE=EP KV_TRANS_USE_P2P=1 python -m lightllm.server.api_server \
212211 --model_dir /path/DeepSeek-R1 \
213212 --run_mode " decode" \
214213 --tp 8 \
@@ -276,7 +275,7 @@ Supports multiple PD Master nodes, providing better load balancing and high avai
276275 export host=$1
277276 export config_server_host=$2
278277 nvidia-cuda-mps-control -d
279- MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server \
278+ MOE_MODE=EP python -m lightllm.server.api_server \
280279 --model_dir /path/DeepSeek-R1 \
281280 --run_mode " prefill" \
282281 --host $host \
@@ -295,7 +294,7 @@ Supports multiple PD Master nodes, providing better load balancing and high avai
295294 export host=$1
296295 export config_server_host=$2
297296 nvidia-cuda-mps-control -d
298- MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server \
297+ MOE_MODE=EP python -m lightllm.server.api_server \
299298 --model_dir /path/DeepSeek-R1 \
300299 --run_mode " decode" \
301300 --host $host \
0 commit comments