|
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
3 | 3 |
|
4 | 4 | import asyncio
|
5 |
| -import contextlib |
6 | 5 | import random
|
7 |
| -import time |
8 | 6 | from typing import Callable
|
9 | 7 |
|
10 | 8 | import openai
|
11 | 9 | import pytest
|
12 | 10 | import pytest_asyncio
|
13 |
| -import requests |
14 | 11 |
|
15 | 12 | from tests.utils import RemoteOpenAIServer
|
16 | 13 |
|
@@ -87,54 +84,3 @@ async def get_status_code(**kwargs):
|
87 | 84 |
|
88 | 85 | responses = await asyncio.gather(*[get_status_code(**b) for b in bodies])
|
89 | 86 | assert 500 not in responses
|
90 |
| - |
91 |
| - |
92 |
| -@pytest.mark.asyncio |
93 |
| -@pytest.mark.parametrize( |
94 |
| - ids=["single completion", "multiple completions", "chat"], |
95 |
| - argnames=["create_func_gen", "content_body"], |
96 |
| - argvalues=[ |
97 |
| - (lambda x: x.completions.create, { |
98 |
| - "prompt": " ".join(['A'] * 300_000) |
99 |
| - }), |
100 |
| - (lambda x: x.completions.create, { |
101 |
| - "prompt": [" ".join(['A'] * 300_000)] * 2 |
102 |
| - }), |
103 |
| - (lambda x: x.chat.completions.create, { |
104 |
| - "messages": [{ |
105 |
| - "role": "user", |
106 |
| - "content": " ".join(['A'] * 300_000) |
107 |
| - }] |
108 |
| - }), |
109 |
| - ], |
110 |
| -) |
111 |
| -async def test_healthcheck_response_time( |
112 |
| - server: RemoteOpenAIServer, |
113 |
| - client: openai.AsyncOpenAI, |
114 |
| - create_func_gen: Callable, |
115 |
| - content_body: dict, |
116 |
| -): |
117 |
| - num_requests = 50 |
118 |
| - |
119 |
| - create_func = create_func_gen(client) |
120 |
| - body = {"model": MODEL_NAME, **content_body, "max_tokens": 10} |
121 |
| - |
122 |
| - def get_response_time(url): |
123 |
| - start_time = time.monotonic() |
124 |
| - res = requests.get(url) |
125 |
| - end_time = time.monotonic() |
126 |
| - assert res.status_code == 200 |
127 |
| - return end_time - start_time |
128 |
| - |
129 |
| - no_load_response_time = get_response_time(server.url_for("health")) |
130 |
| - tasks = [ |
131 |
| - asyncio.create_task(create_func(**body)) for _ in range(num_requests) |
132 |
| - ] |
133 |
| - await asyncio.sleep(1) # give the tasks a chance to start running |
134 |
| - load_response_time = get_response_time(server.url_for("health")) |
135 |
| - |
136 |
| - with contextlib.suppress(openai.APIStatusError): |
137 |
| - await asyncio.gather(*tasks) |
138 |
| - |
139 |
| - assert load_response_time < 100 * no_load_response_time |
140 |
| - assert load_response_time < 0.1 |
0 commit comments