Skip to content

Commit d3953d0

Browse files
committed
add scripts to start ray and vllm
1 parent 1bfb325 commit d3953d0

File tree

3 files changed

+280
-0
lines changed

3 files changed

+280
-0
lines changed

examples/dev/config.properties

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#*****************************
2+
# ray Configuration *
3+
#*****************************
4+
CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7
5+
ASCEND_RT_VISIBLE_DEVICES=1,2,3,4,5,6,7
6+
# For multi-node and multi-gpu inference
7+
RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES=1
8+
MASTER_IP=192.168.0.205
9+
WORKER_IP=192.168.0.127
10+
# Total number of nodes in multi-node inference
11+
NODE_NUM=2
12+
13+
14+
#*****************************
15+
# vLLM Configuration *
16+
#*****************************
17+
# Avoid the error "RuntimeError: CUDASymmetricMemoryAllocator" during multi-node, multi-GPU inference. See it in the issue: https://github.com/vllm-project/vllm/issues/24694
18+
VLLM_ALLREDUCE_USE_SYMM_MEM=0
19+
VLLM_LOGGING_LEVEL=INFO
20+
MODEL=/home/models/QwQ-32B
21+
# If not specified, the model name will be the same as the --model argument.
22+
# SERVED_MODEL_NAME=qwen
23+
TP_SIZE=8
24+
DP_SIZE=1
25+
PP_SIZE=1
26+
# 0 | 1 ; Set 1 to enable expert parallel
27+
ENABLE_EXPERT_PARALLEL=0
28+
MAX_MODEL_LEN=20000
29+
MAX_NUM_BATCH_TOKENS=20000
30+
MAX_NUM_SEQS=64
31+
BLOCK_SIZE=128
32+
GPU_MEMORY_UTILIZATION=0.87
33+
SERVER_HOST=0.0.0.0
34+
SERVER_PORT=7850
35+
ENABLE_PREFIX_CACHING=0
36+
ASYNC_SCHEDULING=0
37+
# NONE | PIECEWISE | FULL | FULL_DECODE_ONLY | FULL_AND_PIECEWISE
38+
GRAPH_MODE=FULL_DECODE_ONLY
39+
QUANTIZATION=None
40+
# mp | ray ; Set mp to start single-node inference
41+
DISTRIBUTED_EXECUTOR_BACKEND=mp
42+
43+
44+
#*****************************
45+
# UCM Configuration *
46+
#*****************************
47+
UCM_ENABLE=1
48+
UCM_CONFIG_YAML_PATH=/vllm-workspace/unified-cache-management/examples/ucm_config_example.yaml

examples/dev/run_vllm.sh

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/bin/bash
2+
3+
load_config() {
4+
local config_file
5+
config_file="$(dirname "${BASH_SOURCE[0]}")/config.properties"
6+
if [[ ! -f "$config_file" ]]; then
7+
echo "ERROR: Config file '$config_file' not found!" >&2
8+
exit 1
9+
fi
10+
11+
while IFS='=' read -r key value; do
12+
key=$(echo "$key" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
13+
value=$(echo "$value" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
14+
15+
[[ -z "$key" || "$key" == \#* ]] && continue
16+
17+
export "$key"="$value"
18+
done < <(grep -v '^\s*#' "$config_file" | grep -v '^\s*$')
19+
}
20+
21+
start_server() {
22+
[[ -z "$MODEL" ]] && { echo "ERROR: MODEL not set in config.properties" >&2; exit 1; }
23+
24+
if [[ "$UCM_ENABLE" == "1" ]]; then
25+
[[ -z "$UCM_CONFIG_YAML_PATH" ]] && {
26+
echo "ERROR: UCM_CONFIG_YAML_PATH not set but UCM_ENABLE=1" >&2
27+
exit 1
28+
}
29+
LOG_FILE="vllm_ucm.log"
30+
else
31+
LOG_FILE="vllm.log"
32+
fi
33+
34+
echo ""
35+
echo "===== vLLM Server Configuration ====="
36+
echo "MODEL = $MODEL"
37+
echo "SERVED_MODEL_NAME = ${SERVED_MODEL_NAME:-<default>}"
38+
echo "TP_SIZE = $TP_SIZE"
39+
echo "DP_SIZE = $DP_SIZE"
40+
echo "PP_SIZE = $PP_SIZE"
41+
echo "ENABLE_EXPERT_PARALLEL = $ENABLE_EXPERT_PARALLEL"
42+
echo "MAX_MODEL_LEN = $MAX_MODEL_LEN"
43+
echo "MAX_NUM_BATCHED_TOKENS = $MAX_NUM_BATCH_TOKENS"
44+
echo "MAX_NUM_SEQS = $MAX_NUM_SEQS"
45+
echo "BLOCK_SIZE = $BLOCK_SIZE"
46+
echo "GPU_MEMORY_UTILIZATION = $GPU_MEMORY_UTILIZATION"
47+
echo "QUANTIZATION = $QUANTIZATION"
48+
echo "SERVER_HOST = $SERVER_HOST"
49+
echo "SERVER_PORT = $SERVER_PORT"
50+
echo "DISTRIBUTED_BACKEND = $DISTRIBUTED_EXECUTOR_BACKEND"
51+
echo "ENABLE_PREFIX_CACHING = $ENABLE_PREFIX_CACHING"
52+
echo "ASYNC_SCHEDULING = $ASYNC_SCHEDULING"
53+
echo "GRAPH_MODE = $GRAPH_MODE"
54+
if [[ "$UCM_ENABLE" == "1" ]]; then
55+
echo "UCM_CONFIG_FILE = $UCM_CONFIG_YAML_PATH"
56+
fi
57+
echo "LOG_FILE = $LOG_FILE"
58+
echo "====================================="
59+
echo ""
60+
61+
CMD=(
62+
vllm serve "$MODEL"
63+
--max-model-len "$MAX_MODEL_LEN"
64+
--tensor-parallel-size "$TP_SIZE"
65+
--data-parallel-size "$DP_SIZE"
66+
--pipeline-parallel-size "$PP_SIZE"
67+
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION"
68+
--trust-remote-code
69+
--max-num-batched-tokens "$MAX_NUM_BATCH_TOKENS"
70+
--max-num-seqs "$MAX_NUM_SEQS"
71+
--block-size "$BLOCK_SIZE"
72+
--host "$SERVER_HOST"
73+
--port "$SERVER_PORT"
74+
--distributed-executor-backend "$DISTRIBUTED_EXECUTOR_BACKEND"
75+
)
76+
77+
if [[ "$ENABLE_EXPERT_PARALLEL" == "1" ]]; then CMD+=("--enable-expert-parallel"); fi
78+
79+
if [[ "$ENABLE_PREFIX_CACHING" == "0" ]]; then CMD+=("--no-enable-prefix-caching"); fi
80+
81+
if [[ "$ASYNC_SCHEDULING" == "1" ]]; then CMD+=("--async-scheduling"); fi
82+
83+
[[ -n "$SERVED_MODEL_NAME" ]] && CMD+=("--served-model-name" "$SERVED_MODEL_NAME")
84+
85+
[[ "$QUANTIZATION" != "None" ]] && CMD+=("--quantization" "$QUANTIZATION")
86+
87+
if [[ "$UCM_ENABLE" == "1" ]]; then
88+
KV_CONFIG_JSON="{
89+
\"kv_connector\":\"UCMConnector\",
90+
\"kv_connector_module_path\":\"ucm.integration.vllm.ucm_connector\",
91+
\"kv_role\":\"kv_both\",
92+
\"kv_connector_extra_config\":{\"UCM_CONFIG_FILE\":\"$UCM_CONFIG_YAML_PATH\"}
93+
}"
94+
CMD+=("--kv-transfer-config" "$KV_CONFIG_JSON")
95+
fi
96+
97+
if [[ -n "$GRAPH_MODE" ]]; then
98+
COMPILATION_CONFIG='{"cudagraph_mode":"'"$GRAPH_MODE"'"}'
99+
CMD+=("--compilation-config" "$COMPILATION_CONFIG")
100+
fi
101+
102+
echo "Executing command: ${CMD[*]}"
103+
echo ""
104+
105+
"${CMD[@]}" 2>&1 | tee "$LOG_FILE"
106+
}
107+
108+
load_config
109+
start_server

examples/dev/start_ray.sh

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/bin/bash
2+
3+
if [[ -z "$NODE" ]]; then
4+
echo "ERROR: Please set NODE=N before running. N should be 0 for head node; 1,2,3... for workers. Note the IPs and environment variables in the script should be modified accordingly. "
5+
echo "Usage: NODE=0 ./start_ray.sh"
6+
exit 1
7+
fi
8+
9+
load_config() {
10+
config_file="$(dirname "${BASH_SOURCE[0]}")/config.properties"
11+
if [[ ! -f "$config_file" ]]; then
12+
echo "ERROR: Config file '$config_file' not found!"
13+
exit 1
14+
fi
15+
16+
while IFS='=' read -r key value; do
17+
key=$(echo "$key" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
18+
value=$(echo "$value" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
19+
20+
if [[ -z "$key" ]] || [[ "$key" == \#* ]]; then
21+
continue
22+
fi
23+
24+
export "$key"="$value"
25+
done < <(grep -v '^\s*#' "$config_file" | grep -v '^\s*$')
26+
}
27+
28+
ensure_ifconfig_installed() {
29+
if command -v ifconfig >/dev/null 2>&1; then
30+
return 0
31+
fi
32+
33+
echo "ifconfig not found. Attempting to install net-tools..."
34+
35+
if command -v apt-get >/dev/null 2>&1; then
36+
echo "Detected apt-get (Debian/Ubuntu). Installing net-tools..."
37+
sudo apt-get update && sudo apt-get install -y net-tools
38+
elif command -v yum >/dev/null 2>&1; then
39+
echo "Detected yum (RHEL/CentOS). Installing net-tools..."
40+
sudo yum install -y net-tools
41+
elif command -v dnf >/dev/null 2>&1; then
42+
echo "Detected dnf (Fedora). Installing net-tools..."
43+
sudo dnf install -y net-tools
44+
else
45+
echo "ERROR: No supported package manager (apt/yum/dnf) found."
46+
echo "Please install 'net-tools' manually or use a system with 'ip' command."
47+
exit 1
48+
fi
49+
50+
if ! command -v ifconfig >/dev/null 2>&1; then
51+
echo "ERROR: Failed to install ifconfig. Please check permissions or network."
52+
exit 1
53+
fi
54+
55+
echo "✅ ifconfig is now available."
56+
}
57+
58+
get_interface_by_ip() {
59+
local target_ip="$1"
60+
ifconfig | awk -v target="$target_ip" '
61+
/^[[:alnum:]]/ {
62+
iface = $1
63+
sub(/:$/, "", iface)
64+
}
65+
/inet / {
66+
for (i = 1; i <= NF; i++) {
67+
gsub(/addr:/, "", $i)
68+
if ($i == target) {
69+
print iface
70+
exit
71+
}
72+
}
73+
}
74+
'
75+
}
76+
77+
set_node_env(){
78+
if [[ "$NODE" == "0" ]]; then
79+
export TARGET_IP="$MASTER_IP"
80+
else
81+
export TARGET_IP="$WORKER_IP"
82+
fi
83+
84+
IFACE=$(get_interface_by_ip "$TARGET_IP")
85+
86+
if [[ -z "$IFACE" ]]; then
87+
echo "WARNING: Could not find interface with IP $TARGET_IP via ifconfig. Falling back to 'eth0'."
88+
IFACE="eth0"
89+
else
90+
echo "✅ Detected interface: $IFACE (bound to IP $TARGET_IP)"
91+
fi
92+
93+
export HCCL_IF_IP="$TARGET_IP"
94+
export NCCL_SOCKET_IFNAME="$IFACE"
95+
export GLOO_SOCKET_IFNAME="$IFACE"
96+
export TP_SOCKET_IFNAME="$IFACE"
97+
export NUM_GPUS=$(($TP_SIZE / $NODE_NUM))
98+
99+
echo ""
100+
echo "===== Ray Startup Configuration ======"
101+
echo "NODE = $NODE"
102+
echo "LOCAL_IP = $TARGET_IP"
103+
if [[ "$NODE" != "0" ]]; then
104+
echo "MASTER_IP = $MASTER_IP"
105+
fi
106+
echo "NETWORK_INTERFACE = $IFACE"
107+
echo "NUM_GPUS (per node) = $NUM_GPUS"
108+
echo "CUDA_VISIBLE_DEVICES = $CUDA_VISIBLE_DEVICES"
109+
echo "ASCEND_RT_VISIBLE_DEVICES= $ASCEND_RT_VISIBLE_DEVICES"
110+
echo "======================================"
111+
echo ""
112+
}
113+
114+
load_config
115+
set_node_env
116+
117+
if [[ "$NODE" == "0" ]]; then
118+
echo "Starting Ray head node on NODE 0, MASTER_IP: $TARGET_IP"
119+
ray start --head --num-gpus=$NUM_GPUS --node-ip-address="$TARGET_IP" --port=6379
120+
else
121+
echo "Starting Ray worker node on NODE $NODE, WORKER_IP=$TARGET_IP, connecting to master at $MASTER_IP"
122+
ray start --address="$MASTER_IP:6379" --num-gpus=$NUM_GPUS --node-ip-address="$TARGET_IP"
123+
fi

0 commit comments

Comments
 (0)