Skip to content

Commit 1c3091c

Browse files
xinhe-nvLarryXFly
andauthored
tests: [TRTQA-2906] add benchmark serving tests (NVIDIA#4901)
Signed-off-by: xinhe-nv <[email protected]> Co-authored-by: Larry <[email protected]>
1 parent ddbaa5e commit 1c3091c

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed

tests/integration/defs/test_e2e.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,6 +1433,20 @@ def test_openai_multinodes_chat_tp8pp2(llm_root, llm_venv):
14331433
])
14341434

14351435

1436+
@pytest.mark.skip_less_device_memory(80000)
1437+
def test_trtllm_benchmark_serving(llm_root, llm_venv):
1438+
example_root = Path(os.path.join(llm_root, "examples", "apps"))
1439+
test_root = unittest_path() / "llmapi" / "apps"
1440+
llm_venv.run_cmd([
1441+
"-m", "pip", "install", "-r",
1442+
os.path.join(example_root, "requirements.txt")
1443+
])
1444+
1445+
llm_venv.run_cmd(
1446+
["-m", "pytest",
1447+
str(test_root / "_test_trtllm_serve_benchmark.py")])
1448+
1449+
14361450
def test_build_time_benchmark_sanity(llm_root, llm_venv):
14371451
temp = tempfile.TemporaryDirectory()
14381452
llm_venv.run_cmd([

tests/integration/test_lists/qa/examples_test_list.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ test_e2e.py::test_mistral_e2e[use_py_session---]
490490
test_e2e.py::test_qwen_e2e_cpprunner_large_new_tokens[DeepSeek-R1-Distill-Qwen-1.5B-DeepSeek-R1-Distill-Qwen-1.5B]
491491
test_e2e.py::test_openai_multi_chat_example
492492
test_e2e.py::test_openai_consistent_chat
493+
test_e2e.py::test_trtllm_benchmark_serving
493494
llmapi/test_llm_examples.py::test_llmapi_server_example
494495
# Pivot to Pytorch test cases.
495496
test_e2e.py::test_ptp_quickstart
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import os
2+
import subprocess
3+
import sys
4+
5+
import pytest
6+
from utils.util import skip_gpu_memory_less_than_80gb
7+
8+
from .openai_server import RemoteOpenAIServer
9+
10+
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
11+
from test_llm import get_model_path
12+
13+
14+
@pytest.fixture(scope="module")
15+
def model_name():
16+
return "llama-3.1-model/Meta-Llama-3.1-8B"
17+
18+
19+
@pytest.fixture(scope="module")
20+
def model_path(model_name: str):
21+
return get_model_path(model_name)
22+
23+
24+
@pytest.fixture(scope="module")
25+
def server(model_path: str):
26+
# fix port to facilitate concise trtllm-serve examples
27+
with RemoteOpenAIServer(model_path, port=8000) as remote_server:
28+
yield remote_server
29+
30+
31+
@pytest.fixture(scope="module")
32+
def benchmark_root():
33+
llm_root = os.getenv("LLM_ROOT")
34+
return os.path.join(llm_root, "tensorrt_llm", "serve", "scripts")
35+
36+
37+
def dataset_path(dataset_name: str):
38+
if dataset_name == "sharegpt":
39+
return get_model_path(
40+
"datasets/ShareGPT_V3_unfiltered_cleaned_split.json")
41+
else:
42+
raise ValueError(f"Invalid dataset name: {dataset_name}")
43+
44+
45+
@skip_gpu_memory_less_than_80gb
46+
def test_trtllm_serve_benchmark(server: RemoteOpenAIServer, benchmark_root: str,
47+
model_path: str):
48+
client_script = os.path.join(benchmark_root, "benchmark_serving.py")
49+
dataset = dataset_path("sharegpt")
50+
benchmark_cmd = [
51+
"python3", client_script, "--dataset-name", "sharegpt", "--model",
52+
"llama", "--dataset-path", dataset, "--tokenizer", model_path
53+
]
54+
55+
# CalledProcessError will be raised if any errors occur
56+
subprocess.run(benchmark_cmd,
57+
stdout=subprocess.PIPE,
58+
stderr=subprocess.PIPE,
59+
text=True,
60+
check=True)

0 commit comments

Comments
 (0)