14
14
from vllm .outputs import RequestOutput
15
15
from vllm .sampling_params import SamplingParams
16
16
from vllm .sequence import (SamplerOutput , Sequence , SequenceGroup ,
17
- SequenceGroupMetadata , SequenceGroupOutput ,
18
- SequenceOutput , SequenceStatus )
17
+ SequenceGroupOutput , SequenceOutput , SequenceStatus )
19
18
from vllm .transformers_utils .tokenizer import (detokenize_incrementally ,
20
19
get_tokenizer )
21
20
from vllm .utils import Counter
@@ -328,16 +327,6 @@ def has_unfinished_requests(self) -> bool:
328
327
"""Returns True if there are unfinished requests."""
329
328
return self .scheduler .has_unfinished_seqs ()
330
329
331
- def _schedule (
332
- self
333
- ) -> Tuple [List [SequenceGroupMetadata ], SchedulerOutputs ,
334
- List [RequestOutput ]]:
335
- seq_group_metadata_list , scheduler_outputs = self .scheduler .schedule ()
336
- return seq_group_metadata_list , scheduler_outputs , [
337
- RequestOutput .from_seq_group (seq_group )
338
- for seq_group in scheduler_outputs .ignored_seq_groups
339
- ]
340
-
341
330
def _check_beam_search_early_stopping (
342
331
self ,
343
332
early_stopping : Union [bool , str ],
@@ -586,9 +575,7 @@ def step(self) -> List[RequestOutput]:
586
575
and updates the scheduler with the model outputs. Finally, it decodes
587
576
the sequences and returns the newly generated results.
588
577
"""
589
- seq_group_metadata_list , scheduler_outputs , ignored = self ._schedule ()
590
- if scheduler_outputs .is_empty ():
591
- return ignored
578
+ seq_group_metadata_list , scheduler_outputs = self .scheduler .schedule ()
592
579
593
580
# Execute the model.
594
581
output = self ._run_workers (
@@ -597,7 +584,7 @@ def step(self) -> List[RequestOutput]:
597
584
blocks_to_swap_in = scheduler_outputs .blocks_to_swap_in ,
598
585
blocks_to_swap_out = scheduler_outputs .blocks_to_swap_out ,
599
586
blocks_to_copy = scheduler_outputs .blocks_to_copy ,
600
- )
587
+ ) if not scheduler_outputs . is_empty () else []
601
588
602
589
return self ._process_model_outputs (output , scheduler_outputs )
603
590
0 commit comments