Skip to content

Commit c583038

Browse files
authored
[V0 Deprecation] Remove args for multi-step scheduling (#22779)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent d31f97c commit c583038

File tree

2 files changed

+1
-27
lines changed

2 files changed

+1
-27
lines changed

tests/utils_/test_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@ def parser_with_config():
161161
parser.add_argument('--port', type=int)
162162
parser.add_argument('--tensor-parallel-size', type=int)
163163
parser.add_argument('--trust-remote-code', action='store_true')
164-
parser.add_argument('--multi-step-stream-outputs', action=StoreBoolean)
165164
return parser
166165

167166

vllm/config/scheduler.py

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,6 @@ class SchedulerConfig:
115115
(e.g., beam search), recomputation is not currently supported. In
116116
such a case, we use swapping instead."""
117117

118-
num_scheduler_steps: int = 1
119-
"""Maximum number of forward steps per scheduler call."""
120-
121-
multi_step_stream_outputs: bool = True
122-
"""If False, then multi-step will stream outputs at the end of all steps"""
123-
124118
send_delta_data: bool = False
125119
"""Private API. If used, scheduler sends delta data to
126120
workers instead of an entire data. It should be enabled only
@@ -193,16 +187,7 @@ def __post_init__(self) -> None:
193187

194188
if self.max_num_batched_tokens is None:
195189
if self.enable_chunked_prefill:
196-
if self.num_scheduler_steps > 1:
197-
# Multi-step Chunked-Prefill doesn't allow prompt-chunking
198-
# for now. Have max_num_batched_tokens set to max_model_len
199-
# so we don't reject sequences on account of a short
200-
# max_num_batched_tokens.
201-
self.max_num_batched_tokens = max(
202-
self.max_model_len, DEFAULT_MAX_NUM_BATCHED_TOKENS)
203-
else:
204-
self.max_num_batched_tokens = (
205-
DEFAULT_MAX_NUM_BATCHED_TOKENS)
190+
self.max_num_batched_tokens = DEFAULT_MAX_NUM_BATCHED_TOKENS
206191
else:
207192
# If max_model_len is too short, use
208193
# DEFAULT_MAX_NUM_BATCHED_TOKENS as the default value
@@ -293,12 +278,6 @@ def _verify_args(self) -> Self:
293278
f"({self.num_lookahead_slots}) must be greater than or "
294279
"equal to 0.")
295280

296-
if self.num_scheduler_steps < 1:
297-
raise ValueError(
298-
"num_scheduler_steps "
299-
f"({self.num_scheduler_steps}) must be greater than or "
300-
"equal to 1.")
301-
302281
if self.max_num_partial_prefills < 1:
303282
raise ValueError(
304283
f"max_num_partial_prefills ({self.max_num_partial_prefills}) "
@@ -323,7 +302,3 @@ def _verify_args(self) -> Self:
323302
f"max_num_partial_prefills ({self.max_num_partial_prefills}).")
324303

325304
return self
326-
327-
@property
328-
def is_multi_step(self) -> bool:
329-
return self.num_scheduler_steps > 1

0 commit comments

Comments
 (0)