Skip to content

Commit 311d875

Browse files
authored
Drop flaky test_healthcheck_response_time (#22539)
Signed-off-by: Russell Bryant <[email protected]>
1 parent e3edc0a commit 311d875

File tree

1 file changed

+0
-54
lines changed

1 file changed

+0
-54
lines changed

tests/entrypoints/openai/test_async_tokenization.py

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,12 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
import asyncio
5-
import contextlib
65
import random
7-
import time
86
from typing import Callable
97

108
import openai
119
import pytest
1210
import pytest_asyncio
13-
import requests
1411

1512
from tests.utils import RemoteOpenAIServer
1613

@@ -87,54 +84,3 @@ async def get_status_code(**kwargs):
8784

8885
responses = await asyncio.gather(*[get_status_code(**b) for b in bodies])
8986
assert 500 not in responses
90-
91-
92-
@pytest.mark.asyncio
93-
@pytest.mark.parametrize(
94-
ids=["single completion", "multiple completions", "chat"],
95-
argnames=["create_func_gen", "content_body"],
96-
argvalues=[
97-
(lambda x: x.completions.create, {
98-
"prompt": " ".join(['A'] * 300_000)
99-
}),
100-
(lambda x: x.completions.create, {
101-
"prompt": [" ".join(['A'] * 300_000)] * 2
102-
}),
103-
(lambda x: x.chat.completions.create, {
104-
"messages": [{
105-
"role": "user",
106-
"content": " ".join(['A'] * 300_000)
107-
}]
108-
}),
109-
],
110-
)
111-
async def test_healthcheck_response_time(
112-
server: RemoteOpenAIServer,
113-
client: openai.AsyncOpenAI,
114-
create_func_gen: Callable,
115-
content_body: dict,
116-
):
117-
num_requests = 50
118-
119-
create_func = create_func_gen(client)
120-
body = {"model": MODEL_NAME, **content_body, "max_tokens": 10}
121-
122-
def get_response_time(url):
123-
start_time = time.monotonic()
124-
res = requests.get(url)
125-
end_time = time.monotonic()
126-
assert res.status_code == 200
127-
return end_time - start_time
128-
129-
no_load_response_time = get_response_time(server.url_for("health"))
130-
tasks = [
131-
asyncio.create_task(create_func(**body)) for _ in range(num_requests)
132-
]
133-
await asyncio.sleep(1) # give the tasks a chance to start running
134-
load_response_time = get_response_time(server.url_for("health"))
135-
136-
with contextlib.suppress(openai.APIStatusError):
137-
await asyncio.gather(*tasks)
138-
139-
assert load_response_time < 100 * no_load_response_time
140-
assert load_response_time < 0.1

0 commit comments

Comments
 (0)