Skip to content

Commit 3f80a2a

Browse files
committed
perf: use deque for async response chunk iteration
AsyncResponse.__anext__() replays cached chunks via list.pop(0) when the response is already done, which is O(n) per removal. Switch to collections.deque with popleft() for O(1).
1 parent 6b84a0d commit 3f80a2a

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

llm/models.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
import base64
3+
from collections import deque
34
from condense_json import condense_json
45
from dataclasses import dataclass, field
56
import datetime
@@ -1386,13 +1387,13 @@ def __aiter__(self):
13861387
self._start = time.monotonic()
13871388
self._start_utcnow = datetime.datetime.now(datetime.timezone.utc)
13881389
if self._done:
1389-
self._iter_chunks = list(self._chunks) # Make a copy for iteration
1390+
self._iter_chunks = deque(self._chunks) # Make a copy for iteration
13901391
return self
13911392

13921393
async def __anext__(self) -> str:
13931394
if self._done:
13941395
if hasattr(self, "_iter_chunks") and self._iter_chunks:
1395-
return self._iter_chunks.pop(0)
1396+
return self._iter_chunks.popleft()
13961397
raise StopAsyncIteration
13971398

13981399
if not hasattr(self, "_generator"):

0 commit comments

Comments
 (0)