Skip to content

Commit 0defc81

Browse files
committed
Add data parallelism scripts for Ascend enviroment
1 parent d3953d0 commit 0defc81

File tree

9 files changed

+641
-180
lines changed

9 files changed

+641
-180
lines changed

examples/dev/config.properties

Lines changed: 0 additions & 48 deletions
This file was deleted.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#****************************************
2+
# Multi-node Configuration *
3+
#****************************************
4+
master_ip=192.168.0.205
5+
worker_ip=192.168.0.127
6+
export HCCL_OP_EXPANSION_MODE="AIV"
7+
export OMP_PROC_BIND=false
8+
export OMP_NUM_THREADS=100
9+
export HCCL_BUFFSIZE=200
10+
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
11+
export VLLM_ASCEND_ENABLE_MLAPO=1
12+
export HCCL_INTRA_PCIE_ENABLE=1
13+
export HCCL_INTRA_ROCE_ENABLE=0
14+
15+
#****************************************
16+
# vLLM Configuration *
17+
#****************************************
18+
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
19+
export VLLM_LOGGING_LEVEL=INFO
20+
model=/home/models/QwQ-32B
21+
# If not specified, the model name will be the same as the --model argument.
22+
# served_model_name=qwen
23+
server_host=0.0.0.0
24+
server_port=7850
25+
tp_size=4
26+
dp_size=4
27+
dp_size_local=2
28+
dp_rpc_port=13389
29+
pp_size=1
30+
seed=1024
31+
enable_expert_parallel=false
32+
enable_prefix_caching=false
33+
max_model_len=20000
34+
max_num_batch_tokens=20000
35+
max_num_seqs=64
36+
block_size=128
37+
gpu_memory_utilization=0.87
38+
# NONE | PIECEWISE | FULL | FULL_DECODE_ONLY | FULL_AND_PIECEWISE
39+
graph_mode=FULL_DECODE_ONLY
40+
quantization=ascend
41+
async_scheduling=false
42+
43+
#****************************************
44+
# extra vLLM Configuration for Ascend *
45+
#****************************************
46+
# method=deepseek_mtp
47+
enable_ascend_scheduler=false
48+
enable_torchair_graph=false
49+
50+
#****************************************
51+
# UCM Configuration *
52+
#****************************************
53+
# set true to enable UCM
54+
ucm_enable=false
55+
ucm_config_yaml_path=/vllm-workspace/unified-cache-management/examples/ucm_config_example.yaml
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
#!/bin/bash
2+
3+
if [[ -z "$NODE" ]]; then
4+
echo "ERROR: Please set NODE=N before running. N should be 0 for master node; 1,2,3... for workers. Note the IPs and environment variables in the script should be modified accordingly. "
5+
echo "Usage: NODE=0 ./run_vllm.sh"
6+
exit 1
7+
fi
8+
9+
load_config() {
10+
local config_file
11+
config_file="$(dirname "${BASH_SOURCE[0]}")/config.properties"
12+
if [[ ! -f "$config_file" ]]; then
13+
echo "ERROR: Config file '$config_file' not found!" >&2
14+
exit 1
15+
fi
16+
17+
while IFS= read -r line; do
18+
line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
19+
[[ -z "$line" || "$line" == \#* ]] && continue
20+
21+
if [[ "$line" == export\ * ]]; then
22+
rest="${line#export }"
23+
eval "export $rest"
24+
else
25+
if [[ "$line" == *=* ]]; then
26+
key="${line%%=*}"
27+
value="${line#*=}"
28+
key=$(echo "$key" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
29+
value=$(echo "$value" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
30+
eval "$key=\$value"
31+
else
32+
echo "WARNING: Invalid config line (no '=' found): $line" >&2
33+
fi
34+
fi
35+
done < "$config_file"
36+
}
37+
38+
ensure_ifconfig_installed() {
39+
if command -v ifconfig >/dev/null 2>&1; then
40+
return 0
41+
fi
42+
43+
echo "ifconfig not found. Attempting to install net-tools..."
44+
45+
if command -v apt-get >/dev/null 2>&1; then
46+
echo "Detected apt-get (Debian/Ubuntu). Installing net-tools..."
47+
sudo apt-get update && sudo apt-get install -y net-tools
48+
elif command -v yum >/dev/null 2>&1; then
49+
echo "Detected yum (RHEL/CentOS). Installing net-tools..."
50+
sudo yum install -y net-tools
51+
elif command -v dnf >/dev/null 2>&1; then
52+
echo "Detected dnf (Fedora). Installing net-tools..."
53+
sudo dnf install -y net-tools
54+
else
55+
echo "ERROR: No supported package manager (apt/yum/dnf) found."
56+
echo "Please install 'net-tools' manually or use a system with 'ip' command."
57+
exit 1
58+
fi
59+
60+
if ! command -v ifconfig >/dev/null 2>&1; then
61+
echo "ERROR: Failed to install ifconfig. Please check permissions or network."
62+
exit 1
63+
fi
64+
65+
echo "✅ ifconfig is now available."
66+
}
67+
68+
get_interface_by_ip() {
69+
local target_ip="$1"
70+
ifconfig | awk -v target="$target_ip" '
71+
/^[[:alnum:]]/ {
72+
iface = $1
73+
sub(/:$/, "", iface)
74+
}
75+
/inet / {
76+
for (i = 1; i <= NF; i++) {
77+
gsub(/addr:/, "", $i)
78+
if ($i == target) {
79+
print iface
80+
exit
81+
}
82+
}
83+
}
84+
'
85+
}
86+
87+
start_server() {
88+
# Ascend environment variables
89+
if [[ "$NODE" == "0" ]]; then
90+
export TARGET_IP="$master_ip"
91+
else
92+
export TARGET_IP="$worker_ip"
93+
fi
94+
95+
IFACE=$(get_interface_by_ip "$TARGET_IP")
96+
97+
if [[ -z "$IFACE" ]]; then
98+
echo "WARNING: Could not find interface with IP $TARGET_IP via ifconfig. Falling back to 'eth0'."
99+
IFACE="eth0"
100+
else
101+
echo "✅ Detected interface: $IFACE (bound to IP $TARGET_IP)"
102+
fi
103+
104+
export HCCL_IF_IP="$TARGET_IP"
105+
export HCCL_SOCKET_IFNAME="$IFACE"
106+
export GLOO_SOCKET_IFNAME="$IFACE"
107+
export TP_SOCKET_IFNAME="$IFACE"
108+
109+
# vLLM parameters
110+
[[ -z "$model" ]] && { echo "ERROR: model not set in config.properties" >&2; exit 1; }
111+
112+
if [[ "$ucm_enable" == "true" ]]; then
113+
[[ -z "$ucm_config_yaml_path" ]] && {
114+
echo "ERROR: ucm_config_yaml_path not set but ucm_enable=true" >&2
115+
exit 1
116+
}
117+
LOG_FILE="vllm_ucm.log"
118+
else
119+
LOG_FILE="vllm.log"
120+
fi
121+
122+
echo ""
123+
echo "===== vllm server configuration ====="
124+
echo "node = $NODE"
125+
echo "master_ip = $master_ip"
126+
echo "local_ip = $TARGET_IP"
127+
echo "network_interface = $IFACE"
128+
echo "model = $model"
129+
echo "served_model_name = ${served_model_name:-<default>}"
130+
echo "tp_size = $tp_size"
131+
echo "dp_size = $dp_size"
132+
echo "pp_size = $pp_size"
133+
echo "dp_size_local = $dp_size_local"
134+
echo "dp_start_rank = $((dp_size_local * NODE))"
135+
echo "dp_address = $master_ip"
136+
echo "enable_expert_parallel = $enable_expert_parallel"
137+
echo "max_model_len = $max_model_len"
138+
echo "max_num_batched_tokens = $max_num_batch_tokens"
139+
echo "max_num_seqs = $max_num_seqs"
140+
echo "block_size = $block_size"
141+
echo "gpu_memory_utilization = $gpu_memory_utilization"
142+
echo "quantization = $quantization"
143+
echo "server_host = $server_host"
144+
echo "server_port = $server_port"
145+
echo "distributed_backend = $distributed_executor_backend"
146+
echo "enable_prefix_caching = $enable_prefix_caching"
147+
echo "async_scheduling = $async_scheduling"
148+
echo "graph_mode = $graph_mode"
149+
if [[ "$ucm_enable" == "true" ]]; then
150+
echo "ucm_config_file = $ucm_config_yaml_path"
151+
fi
152+
echo "log_file = $LOG_FILE"
153+
echo "====================================="
154+
echo ""
155+
156+
CMD=(
157+
vllm serve "$model"
158+
--max-model-len "$max_model_len"
159+
--tensor-parallel-size "$tp_size"
160+
--data-parallel-size "$dp_size"
161+
--data-parallel-size-local "$dp_size_local"
162+
--data-parallel-start-rank "$((dp_size_local * NODE))"
163+
--data-parallel-address "$master_ip"
164+
--data-parallel-rpc-port "$dp_rpc_port"
165+
--seed "$seed"
166+
--pipeline-parallel-size "$pp_size"
167+
--gpu-memory-utilization "$gpu_memory_utilization"
168+
--trust-remote-code
169+
--max-num-batched-tokens "$max_num_batch_tokens"
170+
--max-num-seqs "$max_num_seqs"
171+
--block-size "$block_size"
172+
--host "$server_host"
173+
--port "$server_port"
174+
)
175+
if [[ "$NODE" != "0" ]]; then CMD+=("--headless"); fi
176+
177+
if [[ "$enable_expert_parallel" == "true" ]]; then CMD+=("--enable-expert-parallel"); fi
178+
179+
if [[ "$enable_prefix_caching" == "false" ]]; then CMD+=("--no-enable-prefix-caching"); fi
180+
181+
if [[ "$async_scheduling" == "true" ]]; then CMD+=("--async-scheduling"); fi
182+
183+
[[ -n "$served_model_name" ]] && CMD+=("--served-model-name" "$served_model_name")
184+
185+
[[ "$quantization" != "NONE" ]] && CMD+=("--quantization" "$quantization")
186+
187+
if [[ -n "$graph_mode" ]]; then
188+
COMPILATION_CONFIG='{"cudagraph_mode": "'"$graph_mode"'"}'
189+
CMD+=("--compilation-config" "$COMPILATION_CONFIG")
190+
fi
191+
192+
if [[ -n "$method" ]]; then
193+
SPECULATIVE_CONFIG='{"num_speculative_tokens": 1, "method":"'"$method"'"}'
194+
CMD+=("--compilation-config" "$SPECULATIVE_CONFIG")
195+
fi
196+
197+
ADDITIONAL_CONFIG='{"ascend_scheduler_config":{"enabled":'"$enable_ascend_scheduler"'},"torchair_graph_config":{"enabled":'"$enable_torchair_graph"'}}'
198+
CMD+=("--additional-config" "$ADDITIONAL_CONFIG")
199+
200+
if [[ "$ucm_enable" == "true" ]]; then
201+
KV_CONFIG_JSON="{
202+
\"kv_connector\":\"UCMConnector\",
203+
\"kv_connector_module_path\":\"ucm.integration.vllm.ucm_connector\",
204+
\"kv_role\":\"kv_both\",
205+
\"kv_connector_extra_config\":{\"UCM_CONFIG_FILE\":\"$ucm_config_yaml_path\"}
206+
}"
207+
CMD+=("--kv-transfer-config" "$KV_CONFIG_JSON")
208+
fi
209+
210+
echo "Executing command: ${CMD[*]}"
211+
echo ""
212+
213+
"${CMD[@]}" 2>&1 | tee "$LOG_FILE"
214+
}
215+
216+
load_config
217+
start_server
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#****************************************
2+
# ray Configuration *
3+
#****************************************
4+
# For multi-node and multi-gpu inference
5+
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
6+
export RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES=1
7+
8+
# For multi-node and multi-npu inference
9+
# export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
10+
# export RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES=1
11+
12+
master_ip=192.168.0.205
13+
worker_ip=192.168.0.127
14+
# Total number of nodes in multi-node inference
15+
node_num=2
16+
17+
#****************************************
18+
# vLLM Configuration *
19+
#****************************************
20+
# Avoid the error "RuntimeError: CUDASymmetricMemoryAllocator" during multi-node, multi-GPU inference. See it in the issue: https://github.com/vllm-project/vllm/issues/24694
21+
export VLLM_ALLREDUCE_USE_SYMM_MEM=0
22+
# Run deepseek v3.1+ on CUDA
23+
export VLLM_USE_DEEP_GEMM=0
24+
export VLLM_LOGGING_LEVEL=INFO
25+
model=/home/models/QwQ-32B
26+
# If not specified, the model name will be the same as the --model argument.
27+
# served_model_name=qwen
28+
server_host=0.0.0.0
29+
server_port=7850
30+
tp_size=4
31+
dp_size=1
32+
pp_size=1
33+
enable_expert_parallel=false
34+
enable_prefix_caching=false
35+
max_model_len=20000
36+
max_num_batch_tokens=20000
37+
max_num_seqs=64
38+
block_size=128
39+
gpu_memory_utilization=0.87
40+
async_scheduling=false
41+
# NONE | PIECEWISE | FULL | FULL_DECODE_ONLY | FULL_AND_PIECEWISE
42+
graph_mode=FULL_DECODE_ONLY
43+
quantization=NONE
44+
# mp | ray ; Set mp to start single-node inference
45+
distributed_executor_backend=ray
46+
47+
#****************************************
48+
# extra vLLM Configuration for Ascend *
49+
#****************************************
50+
51+
52+
#****************************************
53+
# UCM Configuration *
54+
#****************************************
55+
# set true to enable UCM
56+
ucm_enable=true
57+
ucm_config_yaml_path=/vllm-workspace/unified-cache-management/examples/ucm_config_example.yaml

0 commit comments

Comments
 (0)