Skip to content

Commit 4465c0f

Browse files
committed
add antropic api test
Signed-off-by: liuli <[email protected]>
1 parent 8d6da34 commit 4465c0f

File tree

3 files changed

+279
-0
lines changed

3 files changed

+279
-0
lines changed

tests/entrypoints/anthropic/__init__.py

Whitespace-only changes.
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
import pytest
5+
import pytest_asyncio
6+
import anthropic
7+
from ...utils import RemoteAnthropicServer
8+
9+
MODEL_NAME = "Qwen/Qwen3-0.6B"
10+
11+
12+
@pytest.fixture(scope="module")
13+
def server(): # noqa: F811
14+
args = [
15+
"--max-model-len", "8192", "--enforce-eager",
16+
"--enable-auto-tool-choice", "--tool-call-parser", "hermes",
17+
"--served-model-name", "claude-3-7-sonnet-latest"
18+
]
19+
20+
with RemoteAnthropicServer(MODEL_NAME, args) as remote_server:
21+
yield remote_server
22+
23+
24+
@pytest_asyncio.fixture
25+
async def client(server):
26+
async with server.get_async_client() as async_client:
27+
yield async_client
28+
29+
30+
@pytest.mark.asyncio
31+
async def test_simple_messages(client: anthropic.Anthropic):
32+
resp = client.messages.create(
33+
model="claude-3-7-sonnet-latest",
34+
max_tokens=8192,
35+
messages=[
36+
{
37+
"role": "user",
38+
"content": "how are you!"
39+
}
40+
],
41+
)
42+
assert resp.stop_reason == "end_turn"
43+
assert resp.role == "assistant"
44+
45+
print(f"Anthropic response: {resp.model_dump_json()}")
46+
47+
48+
@pytest.mark.asyncio
49+
def test_system_message(client: anthropic.Anthropic):
50+
resp = client.messages.create(
51+
model="claude-3-7-sonnet-latest",
52+
max_tokens=8192,
53+
system="you are a helpful assistant",
54+
messages=[
55+
{
56+
"role": "user",
57+
"content": "how are you!"
58+
}
59+
],
60+
)
61+
assert resp.stop_reason == "end_turn"
62+
assert resp.role == "assistant"
63+
64+
print(f"Anthropic response: {resp.model_dump_json()}")
65+
66+
67+
@pytest.mark.asyncio
68+
def test_anthropic_streaming(client: anthropic.Anthropic):
69+
resp = client.messages.create(
70+
model="claude-3-7-sonnet-latest",
71+
max_tokens=8192,
72+
messages=[
73+
{
74+
"role": "user",
75+
"content": "how are you!"
76+
}
77+
],
78+
stream=True,
79+
)
80+
assert resp.stop_reason == "end_turn"
81+
assert resp.role == "assistant"
82+
83+
for chunk in resp:
84+
print(chunk.model_dump_json())
85+
86+
87+
@pytest.mark.asyncio
88+
def test_anthropic_tool_call(client: anthropic.Anthropic):
89+
resp = client.messages.create(
90+
model="claude-3-7-sonnet-latest",
91+
max_tokens=8192,
92+
messages=[
93+
{
94+
"role": "user",
95+
"content": "What's the weather like in New York today?"
96+
}
97+
],
98+
tools=[
99+
{
100+
"name": "get_current_weather",
101+
"description": "Useful for querying the weather in a specified city.",
102+
"input_schema": {
103+
"type": "object",
104+
"properties": {
105+
"location": {
106+
"type": "string",
107+
"description": "City or region, for example: New York, London, Tokyo, etc."
108+
}
109+
},
110+
"required": ["location"]
111+
}
112+
}
113+
114+
],
115+
stream=False,
116+
)
117+
assert resp.stop_reason == "tool_use"
118+
assert resp.role == "assistant"
119+
120+
print(f'Anthropic response: {resp.model_dump_json()}')
121+
122+
@pytest.mark.asyncio
123+
def test_anthropic_tool_call_streaming(client: anthropic.Anthropic):
124+
resp = client.messages.create(
125+
model="claude-3-7-sonnet-latest",
126+
max_tokens=8192,
127+
messages=[
128+
{
129+
"role": "user",
130+
"content": "What's the weather like in New York today?"
131+
}
132+
],
133+
tools=[
134+
{
135+
"name": "get_current_weather",
136+
"description": "Useful for querying the weather in a specified city.",
137+
"input_schema": {
138+
"type": "object",
139+
"properties": {
140+
"location": {
141+
"type": "string",
142+
"description": "City or region, for example: New York, London, Tokyo, etc."
143+
}
144+
},
145+
"required": ["location"]
146+
}
147+
}
148+
149+
],
150+
stream=True,
151+
)
152+
153+
for chunk in resp:
154+
print(chunk.model_dump_json())

tests/utils.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pathlib import Path
1717
from typing import Any, Callable, Literal, Optional, Union
1818

19+
import anthropic
1920
import cloudpickle
2021
import openai
2122
import pytest
@@ -194,6 +195,130 @@ def get_async_client(self, **kwargs):
194195
**kwargs)
195196

196197

198+
class RemoteAnthropicServer:
199+
DUMMY_API_KEY = "token-abc123" # vLLM's Anthropic server does not need API key
200+
def __init__(self,
201+
model: str,
202+
vllm_serve_args: list[str],
203+
*,
204+
env_dict: Optional[dict[str, str]] = None,
205+
seed: Optional[int] = 0,
206+
auto_port: bool = True,
207+
max_wait_seconds: Optional[float] = None) -> None:
208+
if auto_port:
209+
if "-p" in vllm_serve_args or "--port" in vllm_serve_args:
210+
raise ValueError("You have manually specified the port "
211+
"when `auto_port=True`.")
212+
213+
# Don't mutate the input args
214+
vllm_serve_args = vllm_serve_args + [
215+
"--port", str(get_open_port())
216+
]
217+
if seed is not None:
218+
if "--seed" in vllm_serve_args:
219+
raise ValueError("You have manually specified the seed "
220+
f"when `seed={seed}`.")
221+
222+
vllm_serve_args = vllm_serve_args + ["--seed", str(seed)]
223+
224+
parser = FlexibleArgumentParser(
225+
description="vLLM's remote Anthropic server.")
226+
subparsers = parser.add_subparsers(required=False, dest="subparser")
227+
parser = ServeSubcommand().subparser_init(subparsers)
228+
args = parser.parse_args(["--model", model, *vllm_serve_args])
229+
self.host = str(args.host or 'localhost')
230+
self.port = int(args.port)
231+
232+
self.show_hidden_metrics = \
233+
args.show_hidden_metrics_for_version is not None
234+
235+
# download the model before starting the server to avoid timeout
236+
is_local = os.path.isdir(model)
237+
if not is_local:
238+
engine_args = AsyncEngineArgs.from_cli_args(args)
239+
model_config = engine_args.create_model_config()
240+
load_config = engine_args.create_load_config()
241+
242+
model_loader = get_model_loader(load_config)
243+
model_loader.download_model(model_config)
244+
245+
env = os.environ.copy()
246+
# the current process might initialize cuda,
247+
# to be safe, we should use spawn method
248+
env['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
249+
if env_dict is not None:
250+
env.update(env_dict)
251+
self.proc = subprocess.Popen(
252+
["python -m", "vllm.entrypoints.anthropic.api_server", model, *vllm_serve_args],
253+
env=env,
254+
stdout=sys.stdout,
255+
stderr=sys.stderr,
256+
)
257+
max_wait_seconds = max_wait_seconds or 240
258+
self._wait_for_server(url=self.url_for("health"),
259+
timeout=max_wait_seconds)
260+
261+
def __enter__(self):
262+
return self
263+
264+
def __exit__(self, exc_type, exc_value, traceback):
265+
self.proc.terminate()
266+
try:
267+
self.proc.wait(8)
268+
except subprocess.TimeoutExpired:
269+
# force kill if needed
270+
self.proc.kill()
271+
272+
def _wait_for_server(self, *, url: str, timeout: float):
273+
# run health check
274+
start = time.time()
275+
while True:
276+
try:
277+
if requests.get(url).status_code == 200:
278+
break
279+
except Exception:
280+
# this exception can only be raised by requests.get,
281+
# which means the server is not ready yet.
282+
# the stack trace is not useful, so we suppress it
283+
# by using `raise from None`.
284+
result = self.proc.poll()
285+
if result is not None and result != 0:
286+
raise RuntimeError("Server exited unexpectedly.") from None
287+
288+
time.sleep(0.5)
289+
if time.time() - start > timeout:
290+
raise RuntimeError(
291+
"Server failed to start in time.") from None
292+
293+
@property
294+
def url_root(self) -> str:
295+
return f"http://{self.host}:{self.port}"
296+
297+
def url_for(self, *parts: str) -> str:
298+
return self.url_root + "/" + "/".join(parts)
299+
300+
def get_client(self, **kwargs):
301+
if "timeout" not in kwargs:
302+
kwargs["timeout"] = 600
303+
return anthropic.Anthropic(
304+
base_url=self.url_for("v1"),
305+
api_key=self.DUMMY_API_KEY,
306+
max_retries=0,
307+
**kwargs,
308+
)
309+
310+
def get_async_client(self, **kwargs):
311+
if "timeout" not in kwargs:
312+
kwargs["timeout"] = 600
313+
return anthropic.AsyncAnthropic(
314+
base_url=self.url_for("v1"),
315+
api_key=self.DUMMY_API_KEY,
316+
max_retries=0,
317+
**kwargs
318+
)
319+
320+
321+
197322
def _test_completion(
198323
client: openai.OpenAI,
199324
model: str,

0 commit comments

Comments
 (0)