We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e2ff3a7 commit c8d01beCopy full SHA for c8d01be
src/huggingface_hub/inference/_common.py
@@ -347,9 +347,12 @@ def _format_chat_completion_stream_output(
347
348
349
async def _async_yield_from(client: "ClientSession", response: "ClientResponse") -> AsyncIterable[bytes]:
350
- async for byte_payload in response.content:
351
- yield byte_payload.strip()
352
- await client.close()
+ try:
+ async for byte_payload in response.content:
+ yield byte_payload.strip()
353
+ finally:
354
+ # Always close the underlying HTTP session to avoid resource leaks
355
+ await client.close()
356
357
358
# "TGI servers" are servers running with the `text-generation-inference` backend.
0 commit comments