Skip to content

Commit e31ae3d

Browse files
[Deprecation] Remove inputs arg fallback in Engine classes (#18799)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 2ffb9b6 commit e31ae3d

File tree

4 files changed

+21
-268
lines changed

4 files changed

+21
-268
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 9 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@
66
import time
77
import weakref
88
from functools import partial
9-
from typing import (Any, AsyncGenerator, Callable, Coroutine, Dict, Iterable,
10-
List, Mapping, Optional, Set, Tuple, Type, Union, overload)
9+
from typing import (Any, AsyncGenerator, Callable, Dict, Iterable, List,
10+
Mapping, Optional, Set, Tuple, Type, Union)
1111
from weakref import ReferenceType
1212

13-
from typing_extensions import deprecated
14-
1513
import vllm.envs as envs
1614
from vllm.config import (DecodingConfig, LoRAConfig, ModelConfig,
1715
ParallelConfig, SchedulerConfig, VllmConfig)
@@ -36,7 +34,7 @@
3634
from vllm.sequence import ExecuteModelRequest
3735
from vllm.transformers_utils.tokenizer import AnyTokenizer
3836
from vllm.usage.usage_lib import UsageContext
39-
from vllm.utils import Device, deprecate_kwargs, weak_bind
37+
from vllm.utils import Device, weak_bind
4038

4139
logger = init_logger(__name__)
4240
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
@@ -429,24 +427,6 @@ async def get_tokenizer_async(self,
429427
return await (
430428
self.get_tokenizer_group().get_lora_tokenizer_async(lora_request))
431429

432-
@overload
433-
@deprecated("'inputs' will be renamed to 'prompt")
434-
async def add_request_async(
435-
self,
436-
request_id: str,
437-
*,
438-
inputs: PromptType,
439-
params: Union[SamplingParams, PoolingParams],
440-
arrival_time: Optional[float] = None,
441-
lora_request: Optional[LoRARequest] = None,
442-
trace_headers: Optional[Mapping[str, str]] = None,
443-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
444-
priority: int = 0,
445-
data_parallel_rank: Optional[int] = None,
446-
) -> None:
447-
...
448-
449-
@overload
450430
async def add_request_async(
451431
self,
452432
request_id: str,
@@ -459,32 +439,10 @@ async def add_request_async(
459439
priority: int = 0,
460440
data_parallel_rank: Optional[int] = None,
461441
) -> None:
462-
...
463-
464-
@deprecate_kwargs(
465-
"inputs",
466-
additional_message="Please use the 'prompt' parameter instead.",
467-
)
468-
async def add_request_async(
469-
self,
470-
request_id: str,
471-
prompt: Optional[PromptType] = None,
472-
params: Optional[Union[SamplingParams, PoolingParams]] = None,
473-
arrival_time: Optional[float] = None,
474-
lora_request: Optional[LoRARequest] = None,
475-
trace_headers: Optional[Mapping[str, str]] = None,
476-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
477-
priority: int = 0,
478-
data_parallel_rank: Optional[int] = None,
479-
*,
480-
inputs: Optional[PromptType] = None, # DEPRECATED
481-
) -> None:
482-
"""Async version of
483-
[`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]."""
484-
if inputs is not None:
485-
prompt = inputs
486-
assert prompt is not None and params is not None
487-
442+
"""
443+
Async version of
444+
[`add_request`][vllm.engine.llm_engine.LLMEngine.add_request].
445+
"""
488446
if lora_request is not None and not self.lora_config:
489447
raise ValueError(f"Got lora_request {lora_request} but LoRA is "
490448
"not enabled!")
@@ -521,8 +479,7 @@ async def add_request_async(
521479
params = await build_guided_decoding_logits_processor_async(
522480
sampling_params=params,
523481
tokenizer=await self.get_tokenizer_async(lora_request),
524-
default_guided_backend=self.decoding_config.
525-
guided_decoding_backend,
482+
default_guided_backend=self.decoding_config.backend,
526483
reasoning_backend=self.decoding_config.reasoning_backend,
527484
model_config=self.model_config)
528485

@@ -894,28 +851,7 @@ async def run_engine_loop(engine_ref: ReferenceType):
894851
raise
895852
await asyncio.sleep(0)
896853

897-
# This method does not need to be async, but kept that way
898-
# for backwards compatibility.
899-
@overload
900-
@deprecated("'inputs' will be renamed to 'prompt")
901-
def add_request(
902-
self,
903-
request_id: str,
904-
*,
905-
inputs: PromptType,
906-
params: Union[SamplingParams, PoolingParams],
907-
arrival_time: Optional[float] = None,
908-
lora_request: Optional[LoRARequest] = None,
909-
trace_headers: Optional[Mapping[str, str]] = None,
910-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
911-
priority: int = 0,
912-
data_parallel_rank: Optional[int] = None,
913-
) -> Coroutine[None, None, AsyncGenerator[Union[
914-
RequestOutput, PoolingRequestOutput], None]]:
915-
...
916-
917-
@overload
918-
def add_request(
854+
async def add_request(
919855
self,
920856
request_id: str,
921857
prompt: PromptType,
@@ -926,32 +862,7 @@ def add_request(
926862
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
927863
priority: int = 0,
928864
data_parallel_rank: Optional[int] = None,
929-
) -> Coroutine[None, None, AsyncGenerator[Union[
930-
RequestOutput, PoolingRequestOutput], None]]:
931-
...
932-
933-
@deprecate_kwargs(
934-
"inputs",
935-
additional_message="Please use the 'prompt' parameter instead.",
936-
)
937-
async def add_request(
938-
self,
939-
request_id: str,
940-
prompt: Optional[PromptType] = None,
941-
params: Optional[Union[SamplingParams, PoolingParams]] = None,
942-
arrival_time: Optional[float] = None,
943-
lora_request: Optional[LoRARequest] = None,
944-
trace_headers: Optional[Mapping[str, str]] = None,
945-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
946-
priority: int = 0,
947-
data_parallel_rank: Optional[int] = None,
948-
*,
949-
inputs: Optional[PromptType] = None, # DEPRECATED
950865
) -> AsyncGenerator[Union[RequestOutput, PoolingRequestOutput], None]:
951-
if inputs is not None:
952-
prompt = inputs
953-
assert prompt is not None and params is not None
954-
955866
if not self.is_running:
956867
if self.start_engine_loop:
957868
self.start_background_loop()

vllm/engine/llm_engine.py

Lines changed: 3 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Deque, Dict,
1212
Iterable, List, Literal, Mapping, NamedTuple, Optional)
1313
from typing import Sequence as GenericSequence
14-
from typing import Set, Type, Union, cast, overload
14+
from typing import Set, Type, Union, cast
1515

1616
import torch
17-
from typing_extensions import TypeVar, deprecated
17+
from typing_extensions import TypeVar
1818

1919
import vllm.envs as envs
2020
from vllm.config import (DecodingConfig, LoRAConfig, ModelConfig,
@@ -58,8 +58,7 @@
5858
TokenizerGroup, init_tokenizer_from_configs)
5959
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
6060
usage_message)
61-
from vllm.utils import (Counter, Device, deprecate_kwargs,
62-
resolve_obj_by_qualname, weak_bind)
61+
from vllm.utils import Counter, Device, resolve_obj_by_qualname, weak_bind
6362
from vllm.version import __version__ as VLLM_VERSION
6463
from vllm.worker.model_runner_base import InputProcessingError
6564

@@ -629,7 +628,6 @@ def _add_processed_request(
629628
def stop_remote_worker_execution_loop(self) -> None:
630629
self.model_executor.stop_remote_worker_execution_loop()
631630

632-
@overload
633631
def add_request(
634632
self,
635633
request_id: str,
@@ -641,42 +639,6 @@ def add_request(
641639
trace_headers: Optional[Mapping[str, str]] = None,
642640
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
643641
priority: int = 0,
644-
) -> None:
645-
...
646-
647-
@overload
648-
@deprecated("'inputs' will be renamed to 'prompt")
649-
def add_request(
650-
self,
651-
request_id: str,
652-
*,
653-
inputs: PromptType,
654-
params: Union[SamplingParams, PoolingParams],
655-
arrival_time: Optional[float] = None,
656-
lora_request: Optional[LoRARequest] = None,
657-
trace_headers: Optional[Mapping[str, str]] = None,
658-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
659-
priority: int = 0,
660-
) -> None:
661-
...
662-
663-
@deprecate_kwargs(
664-
"inputs",
665-
additional_message="Please use the 'prompt' parameter instead.",
666-
)
667-
def add_request(
668-
self,
669-
request_id: str,
670-
prompt: Optional[PromptType] = None,
671-
params: Optional[Union[SamplingParams, PoolingParams]] = None,
672-
arrival_time: Optional[float] = None,
673-
lora_request: Optional[LoRARequest] = None,
674-
tokenization_kwargs: Optional[dict[str, Any]] = None,
675-
trace_headers: Optional[Mapping[str, str]] = None,
676-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
677-
priority: int = 0,
678-
*,
679-
inputs: Optional[PromptType] = None, # DEPRECATED
680642
) -> None:
681643
"""Add a request to the engine's request pool.
682644
@@ -725,10 +687,6 @@ def add_request(
725687
>>> # continue the request processing
726688
>>> ...
727689
"""
728-
if inputs is not None:
729-
prompt = inputs
730-
assert prompt is not None and params is not None
731-
732690
if lora_request is not None and not self.lora_config:
733691
raise ValueError(f"Got lora_request {lora_request} but LoRA is "
734692
"not enabled!")

vllm/engine/multiprocessing/__init__.py

Lines changed: 2 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,15 @@
44
import uuid
55
from dataclasses import dataclass, field
66
from enum import Enum
7-
from typing import List, Mapping, Optional, Union, overload
8-
9-
from typing_extensions import deprecated
7+
from typing import List, Mapping, Optional, Union
108

119
from vllm import PoolingParams
1210
from vllm.inputs import PromptType
1311
from vllm.lora.request import LoRARequest
1412
from vllm.outputs import RequestOutput
1513
from vllm.prompt_adapter.request import PromptAdapterRequest
1614
from vllm.sampling_params import SamplingParams
17-
from vllm.utils import Device, deprecate_kwargs
15+
from vllm.utils import Device
1816

1917
VLLM_RPC_SUCCESS_STR = "SUCCESS"
2018

@@ -38,7 +36,6 @@ class RPCProcessRequest:
3836
prompt_adapter_request: Optional[PromptAdapterRequest] = None
3937
priority: int = 0
4038

41-
@overload
4239
def __init__(
4340
self,
4441
prompt: PromptType,
@@ -49,44 +46,6 @@ def __init__(
4946
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
5047
priority: int = 0,
5148
) -> None:
52-
...
53-
54-
@overload
55-
@deprecated("'inputs' will be renamed to 'prompt")
56-
def __init__(
57-
self,
58-
*,
59-
inputs: PromptType,
60-
params: Union[SamplingParams, PoolingParams],
61-
request_id: str,
62-
lora_request: Optional[LoRARequest] = None,
63-
trace_headers: Optional[Mapping[str, str]] = None,
64-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
65-
priority: int = 0,
66-
) -> None:
67-
...
68-
69-
@deprecate_kwargs(
70-
"inputs",
71-
additional_message="Please use the 'prompt' parameter instead.",
72-
)
73-
def __init__(
74-
self,
75-
prompt: Optional[PromptType] = None,
76-
params: Optional[Union[SamplingParams, PoolingParams]] = None,
77-
request_id: Optional[str] = None,
78-
lora_request: Optional[LoRARequest] = None,
79-
trace_headers: Optional[Mapping[str, str]] = None,
80-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
81-
priority: int = 0,
82-
*,
83-
inputs: Optional[PromptType] = None, # DEPRECATED
84-
) -> None:
85-
if inputs is not None:
86-
prompt = inputs
87-
assert (prompt is not None and params is not None
88-
and request_id is not None)
89-
9049
super().__init__()
9150

9251
self.prompt = prompt

0 commit comments

Comments
 (0)