6
6
import time
7
7
import weakref
8
8
from functools import partial
9
- from typing import (Any , AsyncGenerator , Callable , Coroutine , Dict , Iterable ,
10
- List , Mapping , Optional , Set , Tuple , Type , Union , overload )
9
+ from typing import (Any , AsyncGenerator , Callable , Dict , Iterable , List ,
10
+ Mapping , Optional , Set , Tuple , Type , Union )
11
11
from weakref import ReferenceType
12
12
13
- from typing_extensions import deprecated
14
-
15
13
import vllm .envs as envs
16
14
from vllm .config import (DecodingConfig , LoRAConfig , ModelConfig ,
17
15
ParallelConfig , SchedulerConfig , VllmConfig )
36
34
from vllm .sequence import ExecuteModelRequest
37
35
from vllm .transformers_utils .tokenizer import AnyTokenizer
38
36
from vllm .usage .usage_lib import UsageContext
39
- from vllm .utils import Device , deprecate_kwargs , weak_bind
37
+ from vllm .utils import Device , weak_bind
40
38
41
39
logger = init_logger (__name__ )
42
40
ENGINE_ITERATION_TIMEOUT_S = envs .VLLM_ENGINE_ITERATION_TIMEOUT_S
@@ -429,24 +427,6 @@ async def get_tokenizer_async(self,
429
427
return await (
430
428
self .get_tokenizer_group ().get_lora_tokenizer_async (lora_request ))
431
429
432
- @overload
433
- @deprecated ("'inputs' will be renamed to 'prompt" )
434
- async def add_request_async (
435
- self ,
436
- request_id : str ,
437
- * ,
438
- inputs : PromptType ,
439
- params : Union [SamplingParams , PoolingParams ],
440
- arrival_time : Optional [float ] = None ,
441
- lora_request : Optional [LoRARequest ] = None ,
442
- trace_headers : Optional [Mapping [str , str ]] = None ,
443
- prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
444
- priority : int = 0 ,
445
- data_parallel_rank : Optional [int ] = None ,
446
- ) -> None :
447
- ...
448
-
449
- @overload
450
430
async def add_request_async (
451
431
self ,
452
432
request_id : str ,
@@ -459,32 +439,10 @@ async def add_request_async(
459
439
priority : int = 0 ,
460
440
data_parallel_rank : Optional [int ] = None ,
461
441
) -> None :
462
- ...
463
-
464
- @deprecate_kwargs (
465
- "inputs" ,
466
- additional_message = "Please use the 'prompt' parameter instead." ,
467
- )
468
- async def add_request_async (
469
- self ,
470
- request_id : str ,
471
- prompt : Optional [PromptType ] = None ,
472
- params : Optional [Union [SamplingParams , PoolingParams ]] = None ,
473
- arrival_time : Optional [float ] = None ,
474
- lora_request : Optional [LoRARequest ] = None ,
475
- trace_headers : Optional [Mapping [str , str ]] = None ,
476
- prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
477
- priority : int = 0 ,
478
- data_parallel_rank : Optional [int ] = None ,
479
- * ,
480
- inputs : Optional [PromptType ] = None , # DEPRECATED
481
- ) -> None :
482
- """Async version of
483
- [`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]."""
484
- if inputs is not None :
485
- prompt = inputs
486
- assert prompt is not None and params is not None
487
-
442
+ """
443
+ Async version of
444
+ [`add_request`][vllm.engine.llm_engine.LLMEngine.add_request].
445
+ """
488
446
if lora_request is not None and not self .lora_config :
489
447
raise ValueError (f"Got lora_request { lora_request } but LoRA is "
490
448
"not enabled!" )
@@ -521,8 +479,7 @@ async def add_request_async(
521
479
params = await build_guided_decoding_logits_processor_async (
522
480
sampling_params = params ,
523
481
tokenizer = await self .get_tokenizer_async (lora_request ),
524
- default_guided_backend = self .decoding_config .
525
- guided_decoding_backend ,
482
+ default_guided_backend = self .decoding_config .backend ,
526
483
reasoning_backend = self .decoding_config .reasoning_backend ,
527
484
model_config = self .model_config )
528
485
@@ -894,28 +851,7 @@ async def run_engine_loop(engine_ref: ReferenceType):
894
851
raise
895
852
await asyncio .sleep (0 )
896
853
897
- # This method does not need to be async, but kept that way
898
- # for backwards compatibility.
899
- @overload
900
- @deprecated ("'inputs' will be renamed to 'prompt" )
901
- def add_request (
902
- self ,
903
- request_id : str ,
904
- * ,
905
- inputs : PromptType ,
906
- params : Union [SamplingParams , PoolingParams ],
907
- arrival_time : Optional [float ] = None ,
908
- lora_request : Optional [LoRARequest ] = None ,
909
- trace_headers : Optional [Mapping [str , str ]] = None ,
910
- prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
911
- priority : int = 0 ,
912
- data_parallel_rank : Optional [int ] = None ,
913
- ) -> Coroutine [None , None , AsyncGenerator [Union [
914
- RequestOutput , PoolingRequestOutput ], None ]]:
915
- ...
916
-
917
- @overload
918
- def add_request (
854
+ async def add_request (
919
855
self ,
920
856
request_id : str ,
921
857
prompt : PromptType ,
@@ -926,32 +862,7 @@ def add_request(
926
862
prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
927
863
priority : int = 0 ,
928
864
data_parallel_rank : Optional [int ] = None ,
929
- ) -> Coroutine [None , None , AsyncGenerator [Union [
930
- RequestOutput , PoolingRequestOutput ], None ]]:
931
- ...
932
-
933
- @deprecate_kwargs (
934
- "inputs" ,
935
- additional_message = "Please use the 'prompt' parameter instead." ,
936
- )
937
- async def add_request (
938
- self ,
939
- request_id : str ,
940
- prompt : Optional [PromptType ] = None ,
941
- params : Optional [Union [SamplingParams , PoolingParams ]] = None ,
942
- arrival_time : Optional [float ] = None ,
943
- lora_request : Optional [LoRARequest ] = None ,
944
- trace_headers : Optional [Mapping [str , str ]] = None ,
945
- prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
946
- priority : int = 0 ,
947
- data_parallel_rank : Optional [int ] = None ,
948
- * ,
949
- inputs : Optional [PromptType ] = None , # DEPRECATED
950
865
) -> AsyncGenerator [Union [RequestOutput , PoolingRequestOutput ], None ]:
951
- if inputs is not None :
952
- prompt = inputs
953
- assert prompt is not None and params is not None
954
-
955
866
if not self .is_running :
956
867
if self .start_engine_loop :
957
868
self .start_background_loop ()
0 commit comments