add antropic api test

Liliu1997 · Liliu1997 · commit 4465c0f04158 · 2025-08-13T16:29:30.000+08:00
Signed-off-by: liuli &lt;ll407707@alibaba-inc.com&gt;
diff --git a/tests/entrypoints/anthropic/__init__.py b/tests/entrypoints/anthropic/__init__.py
diff --git a/tests/entrypoints/anthropic/test_messages.py b/tests/entrypoints/anthropic/test_messages.py
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import pytest_asyncio
+import anthropic
+from ...utils import RemoteAnthropicServer
+
+MODEL_NAME = "Qwen/Qwen3-0.6B"
+
+
+@pytest.fixture(scope="module")
+def server():  # noqa: F811
+    args = [
+        "--max-model-len", "8192", "--enforce-eager",
+        "--enable-auto-tool-choice", "--tool-call-parser", "hermes",
+        "--served-model-name", "claude-3-7-sonnet-latest"
+    ]
+
+    with RemoteAnthropicServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.mark.asyncio
+async def test_simple_messages(client: anthropic.Anthropic):
+    resp = client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=8192,
+        messages=[
+            {
+                "role": "user",
+                "content": "how are you!"
+            }
+        ],
+    )
+    assert resp.stop_reason == "end_turn"
+    assert resp.role == "assistant"
+
+    print(f"Anthropic response: {resp.model_dump_json()}")
+
+
+@pytest.mark.asyncio
+def test_system_message(client: anthropic.Anthropic):
+    resp = client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=8192,
+        system="you are a helpful assistant",
+        messages=[
+            {
+                "role": "user",
+                "content": "how are you!"
+            }
+        ],
+    )
+    assert resp.stop_reason == "end_turn"
+    assert resp.role == "assistant"
+
+    print(f"Anthropic response: {resp.model_dump_json()}")
+
+
+@pytest.mark.asyncio
+def test_anthropic_streaming(client: anthropic.Anthropic):
+    resp = client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=8192,
+        messages=[
+            {
+                "role": "user",
+                "content": "how are you!"
+            }
+        ],
+        stream=True,
+    )
+    assert resp.stop_reason == "end_turn"
+    assert resp.role == "assistant"
+
+    for chunk in resp:
+        print(chunk.model_dump_json())
+
+
+@pytest.mark.asyncio
+def test_anthropic_tool_call(client: anthropic.Anthropic):
+    resp = client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=8192,
+        messages=[
+            {
+                "role": "user",
+                "content": "What's the weather like in New York today?"
+            }
+        ],
+        tools=[
+            {
+                "name": "get_current_weather",
+                "description": "Useful for querying the weather in a specified city.",
+                "input_schema": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "City or region, for example: New York, London, Tokyo, etc."
+                        }
+                    },
+                    "required": ["location"]
+                }
+            }
+
+        ],
+        stream=False,
+    )
+    assert resp.stop_reason == "tool_use"
+    assert resp.role == "assistant"
+
+    print(f'Anthropic response: {resp.model_dump_json()}')
+
+    @pytest.mark.asyncio
+    def test_anthropic_tool_call_streaming(client: anthropic.Anthropic):
+        resp = client.messages.create(
+            model="claude-3-7-sonnet-latest",
+            max_tokens=8192,
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in New York today?"
+                }
+            ],
+            tools=[
+                {
+                    "name": "get_current_weather",
+                    "description": "Useful for querying the weather in a specified city.",
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "type": "string",
+                                "description": "City or region, for example: New York, London, Tokyo, etc."
+                            }
+                        },
+                        "required": ["location"]
+                    }
+                }
+
+            ],
+            stream=True,
+        )
+
+        for chunk in resp:
+            print(chunk.model_dump_json())
diff --git a/tests/utils.py b/tests/utils.py
@@ -16,6 +16,7 @@
 from pathlib import Path
 from typing import Any, Callable, Literal, Optional, Union
 
+import anthropic
 import cloudpickle
 import openai
 import pytest
@@ -194,6 +195,130 @@ def get_async_client(self, **kwargs):
                                   **kwargs)
 
 
+class RemoteAnthropicServer:
+    DUMMY_API_KEY = "token-abc123"  # vLLM's Anthropic server does not need API key
+    def __init__(self,
+                 model: str,
+                 vllm_serve_args: list[str],
+                 *,
+                 env_dict: Optional[dict[str, str]] = None,
+                 seed: Optional[int] = 0,
+                 auto_port: bool = True,
+                 max_wait_seconds: Optional[float] = None) -> None:
+        if auto_port:
+            if "-p" in vllm_serve_args or "--port" in vllm_serve_args:
+                raise ValueError("You have manually specified the port "
+                                 "when `auto_port=True`.")
+
+            # Don't mutate the input args
+            vllm_serve_args = vllm_serve_args + [
+                "--port", str(get_open_port())
+            ]
+        if seed is not None:
+            if "--seed" in vllm_serve_args:
+                raise ValueError("You have manually specified the seed "
+                                 f"when `seed={seed}`.")
+
+            vllm_serve_args = vllm_serve_args + ["--seed", str(seed)]
+
+        parser = FlexibleArgumentParser(
+            description="vLLM's remote Anthropic server.")
+        subparsers = parser.add_subparsers(required=False, dest="subparser")
+        parser = ServeSubcommand().subparser_init(subparsers)
+        args = parser.parse_args(["--model", model, *vllm_serve_args])
+        self.host = str(args.host or 'localhost')
+        self.port = int(args.port)
+
+        self.show_hidden_metrics = \
+            args.show_hidden_metrics_for_version is not None
+
+        # download the model before starting the server to avoid timeout
+        is_local = os.path.isdir(model)
+        if not is_local:
+            engine_args = AsyncEngineArgs.from_cli_args(args)
+            model_config = engine_args.create_model_config()
+            load_config = engine_args.create_load_config()
+
+            model_loader = get_model_loader(load_config)
+            model_loader.download_model(model_config)
+
+        env = os.environ.copy()
+        # the current process might initialize cuda,
+        # to be safe, we should use spawn method
+        env['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
+        if env_dict is not None:
+            env.update(env_dict)
+        self.proc = subprocess.Popen(
+            ["python -m", "vllm.entrypoints.anthropic.api_server", model, *vllm_serve_args],
+            env=env,
+            stdout=sys.stdout,
+            stderr=sys.stderr,
+        )
+        max_wait_seconds = max_wait_seconds or 240
+        self._wait_for_server(url=self.url_for("health"),
+                              timeout=max_wait_seconds)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.proc.terminate()
+        try:
+            self.proc.wait(8)
+        except subprocess.TimeoutExpired:
+            # force kill if needed
+            self.proc.kill()
+
+    def _wait_for_server(self, *, url: str, timeout: float):
+        # run health check
+        start = time.time()
+        while True:
+            try:
+                if requests.get(url).status_code == 200:
+                    break
+            except Exception:
+                # this exception can only be raised by requests.get,
+                # which means the server is not ready yet.
+                # the stack trace is not useful, so we suppress it
+                # by using `raise from None`.
+                result = self.proc.poll()
+                if result is not None and result != 0:
+                    raise RuntimeError("Server exited unexpectedly.") from None
+
+                time.sleep(0.5)
+                if time.time() - start > timeout:
+                    raise RuntimeError(
+                        "Server failed to start in time.") from None
+
+    @property
+    def url_root(self) -> str:
+        return f"http://{self.host}:{self.port}"
+
+    def url_for(self, *parts: str) -> str:
+        return self.url_root + "/" + "/".join(parts)
+
+    def get_client(self, **kwargs):
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = 600
+        return anthropic.Anthropic(
+            base_url=self.url_for("v1"),
+            api_key=self.DUMMY_API_KEY,
+            max_retries=0,
+            **kwargs,
+        )
+
+    def get_async_client(self, **kwargs):
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = 600
+        return anthropic.AsyncAnthropic(
+            base_url=self.url_for("v1"),
+            api_key=self.DUMMY_API_KEY,
+            max_retries=0,
+            **kwargs
+        )
+
+
+
 def _test_completion(
     client: openai.OpenAI,
     model: str,