Skip to content

Commit 4f75a31

Browse files
authored
[https://nvbugs/5540979][fix] Potential fix for 5540979 (#9716)
Signed-off-by: Rashid Kaleem <[email protected]>
1 parent 3230fbe commit 4f75a31

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

tensorrt_llm/serve/openai_client.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ async def _post_with_retry(
159159
is_stream = request.stream
160160
for attempt in range(self._max_retries + 1):
161161
try:
162+
lines_yielded = 0
162163
start_time = get_steady_clock_now_in_seconds()
163164
async with self._session.post(url, json=json_data) as http_response:
164165
content_type = http_response.headers.get("Content-Type", "")
@@ -172,6 +173,7 @@ async def _post_with_retry(
172173
async for line in self._response_generator(
173174
request, http_response, start_time, server, hooks
174175
):
176+
lines_yielded += 1
175177
yield line
176178
# don't finish the request here since the response generator is not done yet
177179
else:
@@ -183,6 +185,12 @@ async def _post_with_retry(
183185
await self._finish_request(request)
184186
break # break and skip retries if the whole response is processed without exception
185187
except (aiohttp.ClientError, OSError) as e:
188+
if lines_yielded > 0:
189+
logger.error(
190+
f"Client error to {url}: {e} - cannot retry since {lines_yielded} lines were yielded",
191+
traceback.format_exc(),
192+
)
193+
raise
186194
if attempt == self._max_retries:
187195
logger.error(
188196
f"Client error to {url}: {e} - last retry {attempt} of {self._max_retries}"

0 commit comments

Comments
 (0)