Skip to content

Commit b4fe16c

Browse files
authored
Add vllm bench [latency, throughput] CLI commands (#16508)
Signed-off-by: mgoin <[email protected]>
1 parent bc5dd4f commit b4fe16c

File tree

11 files changed

+1771
-2
lines changed

11 files changed

+1771
-2
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,13 @@ steps:
341341
commands:
342342
- bash scripts/run-benchmarks.sh
343343

344+
- label: Benchmarks CLI Test # 10min
345+
source_file_dependencies:
346+
- vllm/
347+
- tests/benchmarks/
348+
commands:
349+
- pytest -v -s benchmarks/
350+
344351
- label: Quantization Test # 33min
345352
source_file_dependencies:
346353
- csrc/

tests/benchmarks/__init__.py

Whitespace-only changes.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
import subprocess
3+
4+
import pytest
5+
6+
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
7+
8+
9+
@pytest.mark.benchmark
10+
def test_bench_latency():
11+
command = [
12+
"vllm", "bench", "latency", "--model", MODEL_NAME, "--input-len", "32",
13+
"--output-len", "1", "--enforce-eager", "--load-format", "dummy"
14+
]
15+
result = subprocess.run(command, capture_output=True, text=True)
16+
print(result.stdout)
17+
print(result.stderr)
18+
19+
assert result.returncode == 0, f"Benchmark failed: {result.stderr}"

tests/benchmarks/test_serve_cli.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
import subprocess
3+
4+
import pytest
5+
6+
from ..utils import RemoteOpenAIServer
7+
8+
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
9+
10+
11+
@pytest.fixture(scope="module")
12+
def server():
13+
args = [
14+
"--max-model-len", "1024", "--enforce-eager", "--load-format", "dummy"
15+
]
16+
17+
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
18+
yield remote_server
19+
20+
21+
@pytest.mark.benchmark
22+
def test_bench_serve(server):
23+
command = [
24+
"vllm",
25+
"bench",
26+
"serve",
27+
"--model",
28+
MODEL_NAME,
29+
"--host",
30+
server.host,
31+
"--port",
32+
str(server.port),
33+
"--random-input-len",
34+
"32",
35+
"--random-output-len",
36+
"4",
37+
"--num-prompts",
38+
"5",
39+
]
40+
result = subprocess.run(command, capture_output=True, text=True)
41+
print(result.stdout)
42+
print(result.stderr)
43+
44+
assert result.returncode == 0, f"Benchmark failed: {result.stderr}"
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
import subprocess
3+
4+
import pytest
5+
6+
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
7+
8+
9+
@pytest.mark.benchmark
10+
def test_bench_throughput():
11+
command = [
12+
"vllm", "bench", "throughput", "--model", MODEL_NAME, "--input-len",
13+
"32", "--output-len", "1", "--enforce-eager", "--load-format", "dummy"
14+
]
15+
result = subprocess.run(command, capture_output=True, text=True)
16+
print(result.stdout)
17+
print(result.stderr)
18+
19+
assert result.returncode == 0, f"Benchmark failed: {result.stderr}"

0 commit comments

Comments
 (0)