1
1
import asyncio
2
2
import time
3
3
from functools import partial
4
- from typing import Any , Dict , Iterable , List , Optional , Set , Type , Union
4
+ from typing import Any , Dict , Iterable , List , Optional , Type , Union
5
5
6
6
from vllm .config import ModelConfig
7
7
from vllm .engine .arg_utils import AsyncEngineArgs
@@ -152,7 +152,7 @@ def __init__(self,
152
152
153
153
# Request id -> stream.
154
154
self .request_streams : Dict [str , AsyncStream ] = {}
155
- self .finished_requests : Set [str ] = set ()
155
+ self .finished_requests : asyncio . Queue [str ] = asyncio . Queue ()
156
156
self .background_loop = None
157
157
if start_engine_loop :
158
158
self .start_background_loop ()
@@ -194,12 +194,14 @@ async def engine_step(self):
194
194
if self .log_requests :
195
195
logger .info (f"Finished request { request_id } ." )
196
196
self .request_streams [request_id ].finish ()
197
- self .finished_requests .add (request_id )
197
+ self .finished_requests .put_nowait (request_id )
198
198
199
- await self ._engine_abort (self .finished_requests )
200
- for request_id in self .finished_requests :
199
+ finished_request = set ()
200
+ while not self .finished_requests .empty ():
201
+ finished_request .add (self .finished_requests .get_nowait ())
202
+ await self ._engine_abort (finished_request )
203
+ for request_id in finished_request :
201
204
del self .request_streams [request_id ]
202
- self .finished_requests .clear ()
203
205
204
206
async def _engine_abort (self , request_ids : Iterable [str ]):
205
207
if self .engine_use_ray :
@@ -226,6 +228,8 @@ async def add_request(
226
228
f"sampling params: { sampling_params } , "
227
229
f"prompt token ids: { prompt_token_ids } ." )
228
230
231
+ if request_id in self .request_streams :
232
+ raise KeyError (f"Request { request_id } already exists." )
229
233
stream = AsyncStream (request_id )
230
234
self .request_streams [request_id ] = stream
231
235
@@ -316,7 +320,7 @@ def _abort(self, request_id: str) -> None:
316
320
logger .info (f"Aborted request { request_id } ." )
317
321
318
322
self .request_streams [request_id ].finish ()
319
- self .finished_requests .add (request_id )
323
+ self .finished_requests .put_nowait (request_id )
320
324
321
325
async def get_model_config (self ) -> ModelConfig :
322
326
"""Get the model configuration of the vLLM engine."""
0 commit comments