Skip to content

Commit e2e76ab

Browse files
committed
add test hpu disagg accuracy script
1 parent b2065a2 commit e2e76ab

File tree

1 file changed

+159
-0
lines changed

1 file changed

+159
-0
lines changed
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
#!/bin/bash
2+
set -xe
3+
4+
# Hosts / ports
5+
PREFILL_HOST=${PREFILL_HOST:-"localhost"}
6+
PREFILL_PORT=${PREFILL_PORT:-8100}
7+
PREFILL_NIXL_SIDE_PORT=${PREFILL_NIXL_SIDE_PORT:-5577}
8+
DECODE_HOST=${DECODE_HOST:-"localhost"}
9+
DECODE_PORT=${DECODE_PORT:-8200}
10+
PROXY_HOST=${PROXY_HOST:-"localhost"}
11+
PROXY_PORT=${PROXY_PORT:-8192}
12+
BASELINE_HOST=${BASELINE_HOST:-"localhost"}
13+
BASELINE_PORT=${BASELINE_PORT:-9290}
14+
15+
16+
# Model to run.
17+
MODEL_NAME=${MODEL_NAME:-"meta-llama/Llama-3.2-3B-Instruct"}
18+
MAX_MODEL_LEN=${MAX_MODEL_LEN:-1024}
19+
BLOCK_SIZE=${BLOCK_SIZE:-32}
20+
21+
22+
# execution env
23+
GIT_ROOT=$(git rev-parse --show-toplevel)
24+
EXP_ROOT="${GIT_ROOT}/tests/v1/kv_connector/nixl_integration"
25+
CONDA_PATH=${CONDA_PATH:-"/home/${USER}/anaconda3"}
26+
CONDA_ENV_NAME=${CONDA_ENV_NAME:-"nixl"}
27+
28+
OUTPUT_FILE=${OUTPUT_FILE:-"${EXP_ROOT}/.tpu_accuracy_test_outputs.txt"}
29+
30+
# Trap the SIGINT signal (triggered by Ctrl+C)
31+
trap 'kill $(jobs -pr)' SIGINT SIGTERM EXIT
32+
33+
34+
# Waits for vLLM server to start.
35+
wait_for_server() {
36+
local host=$1
37+
local port=$2
38+
timeout 1200 bash -c "
39+
until curl -s ${host}:${port}/v1/completions > /dev/null; do
40+
sleep 1
41+
done" && return 0 || return 1
42+
}
43+
44+
# Cleanup function
45+
cleanup() {
46+
echo "Caught Ctrl+C, cleaning up..."
47+
# Cleanup commands
48+
pgrep python | xargs kill -9 || true
49+
# pkill -f python || true
50+
echo "Cleanup complete. Exiting."
51+
}
52+
53+
launch_baseline() {
54+
BASELINE_BASE_CMD="source ${CONDA_PATH}/bin/activate ${CONDA_ENV_NAME};
55+
VLLM_LOGGING_LEVEL=DEBUG \
56+
VLLM_USE_V1=1 \
57+
PJRT_DEVICE=TPU \
58+
VLLM_WORKER_MULTIPROC_METHOD=spawn \
59+
VLLM_ENABLE_V1_MULTIPROCESSING=0 vllm serve $MODEL_NAME \
60+
--host ${BASELINE_HOST} \
61+
--port ${BASELINE_PORT} \
62+
--max-model-len ${MAX_MODEL_LEN}\
63+
--seed 42 \
64+
--block-size ${BLOCK_SIZE} \
65+
--gpu-memory-utilization 0.5 \
66+
--enforce-eager"
67+
echo ${BASELINE_BASE_CMD}
68+
ssh -tt ${BASELINE_HOST} "${BASELINE_BASE_CMD}" &
69+
}
70+
71+
launch_pd() {
72+
PREFILL_BASE_CMD="source ${CONDA_PATH}/bin/activate ${CONDA_ENV_NAME};
73+
UCX_TLS=tcp \
74+
VLLM_MULTIPROC_EXECUTE_MODEL_TIMEOUT_S=200 \
75+
VLLM_LOGGING_LEVEL=DEBUG \
76+
VLLM_USE_V1=1 \
77+
VLLM_NIXL_SIDE_CHANNEL_HOST=${PREFILL_HOST} \
78+
VLLM_NIXL_SIDE_CHANNEL_PORT=${PREFILL_NIXL_SIDE_PORT} \
79+
PJRT_DEVICE=TPU \
80+
VLLM_WORKER_MULTIPROC_METHOD=spawn \
81+
VLLM_ENABLE_V1_MULTIPROCESSING=0 vllm serve $MODEL_NAME \
82+
--host ${PREFILL_HOST} \
83+
--port ${PREFILL_PORT} \
84+
--max-model-len ${MAX_MODEL_LEN}\
85+
--seed 42 \
86+
--block-size ${BLOCK_SIZE} \
87+
--enforce-eager \
88+
--gpu-memory-utilization 0.5 \
89+
--kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'"
90+
91+
92+
DECODE_BASE_CMD="source ${CONDA_PATH}/bin/activate ${CONDA_ENV_NAME};
93+
UCX_TLS=tcp \
94+
VLLM_MULTIPROC_EXECUTE_MODEL_TIMEOUT_S=200 \
95+
VLLM_LOGGING_LEVEL=DEBUG \
96+
VLLM_USE_V1=1 \
97+
PJRT_DEVICE=TPU \
98+
VLLM_WORKER_MULTIPROC_METHOD=spawn \
99+
VLLM_ENABLE_V1_MULTIPROCESSING=0 vllm serve $MODEL_NAME \
100+
--host ${DECODE_HOST} \
101+
--port ${DECODE_PORT} \
102+
--max-model-len ${MAX_MODEL_LEN}\
103+
--seed 42 \
104+
--block-size ${BLOCK_SIZE} \
105+
--enforce-eager \
106+
--gpu-memory-utilization 0.5 \
107+
--kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'"
108+
109+
echo ${PREFILL_BASE_CMD}
110+
echo ${DECODE_BASE_CMD}
111+
sleep 2
112+
113+
# execute on hosts
114+
ssh -tt ${PREFILL_HOST} "${PREFILL_BASE_CMD}" &
115+
ssh -tt ${DECODE_HOST} "${DECODE_BASE_CMD}" &
116+
sleep 1
117+
wait_for_server ${PREFILL_HOST} ${PREFILL_PORT}
118+
sleep 1
119+
wait_for_server ${DECODE_HOST} ${DECODE_PORT}
120+
sleep 1
121+
}
122+
123+
launch_pd_proxy(){
124+
PROXY_BASE_CMD="source ${CONDA_PATH}/bin/activate ${CONDA_ENV_NAME};
125+
python3 ${EXP_ROOT}/toy_proxy_server.py \
126+
--prefiller-host ${PREFILL_HOST} --prefiller-port ${PREFILL_PORT} \
127+
--decoder-host ${DECODE_HOST} --decoder-port ${DECODE_PORT} \
128+
--host=${PROXY_HOST} --port ${PROXY_PORT}"
129+
echo ${PROXY_BASE_CMD}
130+
ssh -tt ${PROXY_HOST} "${PROXY_BASE_CMD}" &
131+
}
132+
133+
run_tests(){
134+
local service_url=$1
135+
local mode=$2
136+
python3 ${EXP_ROOT}/test_disagg_accuracy.py --service_url=${service_url} --model_name=${MODEL_NAME} --mode=${mode} --file_name=${OUTPUT_FILE}
137+
}
138+
139+
140+
# run non-disagg. baseline & save outputs
141+
launch_baseline
142+
sleep 2
143+
wait_for_server ${BASELINE_HOST} ${BASELINE_PORT}
144+
run_tests "http://${BASELINE_HOST}:${BASELINE_PORT}" "baseline"
145+
cleanup
146+
sleep 10
147+
148+
149+
# run disagg. & do exact-match with the outputs from baseline
150+
launch_pd
151+
launch_pd_proxy
152+
sleep 10
153+
run_tests "http://${PROXY_HOST}:${PROXY_PORT}" "disagg"
154+
echo "-----P/D success----"
155+
156+
rm ${OUTPUT_FILE}
157+
cleanup
158+
159+
exit 0

0 commit comments

Comments
 (0)