Skip to content

Commit 1696725

Browse files
authored
Initialize AsyncLLMEngine bg loop correctly (#943)
1 parent 002800f commit 1696725

File tree

3 files changed

+25
-7
lines changed

3 files changed

+25
-7
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,15 @@ def __init__(self,
155155
self.finished_requests: Set[str] = set()
156156
self.background_loop = None
157157
if start_engine_loop:
158-
self._start_background_loop()
158+
self.start_background_loop()
159159

160-
def _start_background_loop(self) -> None:
160+
@property
161+
def is_running(self) -> bool:
162+
return self.background_loop is not None
163+
164+
def start_background_loop(self) -> None:
161165
"""Start the background loop."""
162-
if self.background_loop is not None:
166+
if self.is_running:
163167
raise RuntimeError("Background loop is already running.")
164168
self.background_loop = asyncio.get_event_loop().create_task(
165169
self.run_engine_loop())
@@ -323,7 +327,8 @@ async def get_model_config(self) -> ModelConfig:
323327

324328
@classmethod
325329
def from_engine_args(cls,
326-
engine_args: AsyncEngineArgs) -> "AsyncLLMEngine":
330+
engine_args: AsyncEngineArgs,
331+
start_engine_loop: bool = False) -> "AsyncLLMEngine":
327332
"""Creates an async LLM engine from the engine arguments."""
328333
# Create the engine configs.
329334
engine_configs = engine_args.create_engine_configs()
@@ -338,5 +343,6 @@ def from_engine_args(cls,
338343
distributed_init_method,
339344
placement_group,
340345
log_requests=not engine_args.disable_log_requests,
341-
log_stats=not engine_args.disable_log_stats)
346+
log_stats=not engine_args.disable_log_stats,
347+
start_engine_loop=start_engine_loop)
342348
return engine

vllm/entrypoints/api_server.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ async def generate(request: Request) -> Response:
3030
stream = request_dict.pop("stream", False)
3131
sampling_params = SamplingParams(**request_dict)
3232
request_id = random_uuid()
33+
34+
if not engine.is_running:
35+
engine.start_background_loop()
36+
3337
results_generator = engine.generate(prompt, sampling_params, request_id)
3438

3539
# Streaming case
@@ -75,7 +79,8 @@ async def abort_request() -> None:
7579
args = parser.parse_args()
7680

7781
engine_args = AsyncEngineArgs.from_cli_args(args)
78-
engine = AsyncLLMEngine.from_engine_args(engine_args)
82+
engine = AsyncLLMEngine.from_engine_args(engine_args,
83+
start_engine_loop=False)
7984

8085
uvicorn.run(app,
8186
host=args.host,

vllm/entrypoints/openai/api_server.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ async def create_chat_completion(request: ChatCompletionRequest,
191191
"""
192192
logger.info(f"Received chat completion request: {request}")
193193

194+
if not engine.is_running:
195+
engine.start_background_loop()
196+
194197
error_check_ret = await check_model(request)
195198
if error_check_ret is not None:
196199
return error_check_ret
@@ -363,6 +366,9 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
363366
"""
364367
logger.info(f"Received completion request: {request}")
365368

369+
if not engine.is_running:
370+
engine.start_background_loop()
371+
366372
error_check_ret = await check_model(request)
367373
if error_check_ret is not None:
368374
return error_check_ret
@@ -620,7 +626,8 @@ async def fake_stream_generator() -> AsyncGenerator[str, None]:
620626
served_model = args.model
621627

622628
engine_args = AsyncEngineArgs.from_cli_args(args)
623-
engine = AsyncLLMEngine.from_engine_args(engine_args)
629+
engine = AsyncLLMEngine.from_engine_args(engine_args,
630+
start_engine_loop=False)
624631
engine_model_config = asyncio.run(engine.get_model_config())
625632
max_model_len = engine_model_config.get_max_model_len()
626633

0 commit comments

Comments
 (0)