@@ -475,7 +475,8 @@ async def add_request_async(
475
475
* ,
476
476
inputs : Optional [PromptType ] = None , # DEPRECATED
477
477
) -> None :
478
- """Async version of {meth}`add_request`."""
478
+ """Async version of
479
+ [`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]."""
479
480
if inputs is not None :
480
481
prompt = inputs
481
482
assert prompt is not None and params is not None
@@ -582,20 +583,21 @@ async def build_guided_decoding_logits_processor_async(
582
583
583
584
584
585
class AsyncLLMEngine (EngineClient ):
585
- """An asynchronous wrapper for {class} `LLMEngine`.
586
+ """An asynchronous wrapper for [ `LLMEngine`][vllm.LLMEngine] .
586
587
587
- This class is used to wrap the {class}`LLMEngine` class to make it
588
- asynchronous. It uses asyncio to create a background loop that keeps
589
- processing incoming requests. The {class}`LLMEngine` is kicked by the
590
- generate method when there are requests in the waiting queue. The generate
591
- method yields the outputs from the {class}`LLMEngine` to the caller.
588
+ This class is used to wrap the [`LLMEngine`][vllm.LLMEngine] class to
589
+ make it asynchronous. It uses asyncio to create a background loop that keeps
590
+ processing incoming requests. The [`LLMEngine`][vllm.LLMEngine] is kicked
591
+ by the generate method when there are requests in the waiting queue. The
592
+ generate method yields the outputs from the [`LLMEngine`][vllm.LLMEngine]
593
+ to the caller.
592
594
593
595
Args:
594
596
log_requests: Whether to log the requests.
595
597
start_engine_loop: If True, the background task to run the engine
596
598
will be automatically started in the generate call.
597
- *args: Arguments for {class} `LLMEngine`.
598
- **kwargs: Arguments for {class} `LLMEngine`.
599
+ *args: Arguments for [ `LLMEngine`][vllm.LLMEngine] .
600
+ **kwargs: Arguments for [ `LLMEngine`][vllm.LLMEngine] .
599
601
"""
600
602
601
603
_engine_class : Type [_AsyncLLMEngine ] = _AsyncLLMEngine
@@ -985,8 +987,9 @@ async def generate(
985
987
from the LLMEngine to the caller.
986
988
987
989
Args:
988
- prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
989
- for more details about the format of each input.
990
+ prompt: The prompt to the LLM. See
991
+ [`PromptType`][vllm.inputs.PromptType] for more details about
992
+ the format of each input.
990
993
sampling_params: The sampling parameters of the request.
991
994
request_id: The unique id of the request.
992
995
lora_request: LoRA request to use for generation, if any.
@@ -1003,7 +1006,7 @@ async def generate(
1003
1006
Details:
1004
1007
- If the engine is not running, start the background loop,
1005
1008
which iteratively invokes
1006
- {meth}`~ vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
1009
+ [`engine_step`][ vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step]
1007
1010
to process the waiting requests.
1008
1011
- Add the request to the engine's `RequestTracker`.
1009
1012
On the next background loop, this request will be sent to
@@ -1075,8 +1078,9 @@ async def encode(
1075
1078
from the LLMEngine to the caller.
1076
1079
1077
1080
Args:
1078
- prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
1079
- for more details about the format of each input.
1081
+ prompt: The prompt to the LLM. See
1082
+ [`PromptType`][vllm.inputs.PromptType] for more details about
1083
+ the format of each input.
1080
1084
pooling_params: The pooling parameters of the request.
1081
1085
request_id: The unique id of the request.
1082
1086
lora_request: LoRA request to use for generation, if any.
@@ -1089,15 +1093,15 @@ async def encode(
1089
1093
for the request.
1090
1094
1091
1095
Details:
1092
- - If the engine is not running, start the background loop,
1093
- which iteratively invokes
1094
- {meth}`~ vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
1095
- to process the waiting requests.
1096
- - Add the request to the engine's `RequestTracker`.
1097
- On the next background loop, this request will be sent to
1098
- the underlying engine.
1099
- Also, a corresponding `AsyncStream` will be created.
1100
- - Wait for the request outputs from `AsyncStream` and yield them.
1096
+ - If the engine is not running, start the background loop,
1097
+ which iteratively invokes
1098
+ [` vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`][]
1099
+ to process the waiting requests.
1100
+ - Add the request to the engine's `RequestTracker`.
1101
+ On the next background loop, this request will be sent to
1102
+ the underlying engine.
1103
+ Also, a corresponding `AsyncStream` will be created.
1104
+ - Wait for the request outputs from `AsyncStream` and yield them.
1101
1105
1102
1106
Example:
1103
1107
```
0 commit comments