Skip to content

Commit ad8d696

Browse files
authored
[Core] Scheduler perf fix (#4270)
1 parent 3d92516 commit ad8d696

File tree

2 files changed

+11
-14
lines changed

2 files changed

+11
-14
lines changed

tests/core/test_scheduler.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ def test_decode_schedule_preempted():
540540
curr_loras = None
541541
for i in range(3):
542542
_, seq_group = create_dummy_prompt(str(i), prompt_length=60)
543-
scheduler._allocate_and_set_running(seq_group, 60)
543+
scheduler._allocate_and_set_running(seq_group)
544544
append_new_token_seq_group(60, seq_group, 1)
545545
running.append(seq_group)
546546
scheduler.block_manager.can_append_slots = MagicMock()
@@ -581,7 +581,7 @@ def test_decode_swap_beam_search():
581581
budget = create_token_budget()
582582
for i in range(3):
583583
_, seq_group = create_dummy_prompt(str(i), prompt_length=60, best_of=2)
584-
scheduler._allocate_and_set_running(seq_group, 60)
584+
scheduler._allocate_and_set_running(seq_group)
585585
running.append(seq_group)
586586
append_new_token_seq_group(60, seq_group, 1)
587587
budget.add_num_seqs(seq_group.request_id,
@@ -629,7 +629,7 @@ def test_schedule_decode_blocks_to_copy_update():
629629
running = deque()
630630
policy = PolicyFactory.get_policy(policy_name="fcfs")
631631
curr_loras = None
632-
scheduler._allocate_and_set_running(seq_group, 60)
632+
scheduler._allocate_and_set_running(seq_group)
633633
append_new_token_seq_group(60, seq_group, 1)
634634
running.append(seq_group)
635635

@@ -659,7 +659,7 @@ def test_schedule_swapped_simple():
659659
curr_loras = None
660660
blocks_to_swap_out = {}
661661
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
662-
scheduler._allocate_and_set_running(seq_group, 60)
662+
scheduler._allocate_and_set_running(seq_group)
663663
append_new_token_seq_group(60, seq_group, 1)
664664
scheduler._swap_out(seq_group, blocks_to_swap_out)
665665
swapped.append(seq_group)
@@ -687,7 +687,7 @@ def test_schedule_swapped_max_token_budget():
687687
blocks_to_swap_out = {}
688688
for _ in range(2):
689689
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
690-
scheduler._allocate_and_set_running(seq_group, 60)
690+
scheduler._allocate_and_set_running(seq_group)
691691
append_new_token_seq_group(60, seq_group, 1)
692692
scheduler._swap_out(seq_group, blocks_to_swap_out)
693693
swapped.append(seq_group)
@@ -721,7 +721,7 @@ def test_schedule_swapped_max_seqs():
721721
blocks_to_swap_out = {}
722722
for i in range(4):
723723
_, seq_group = create_dummy_prompt(str(i), prompt_length=60)
724-
scheduler._allocate_and_set_running(seq_group, 60)
724+
scheduler._allocate_and_set_running(seq_group)
725725
append_new_token_seq_group(60, seq_group, 1)
726726
scheduler._swap_out(seq_group, blocks_to_swap_out)
727727
swapped.append(seq_group)
@@ -759,7 +759,7 @@ def test_schedule_swapped_max_loras():
759759
lora_name=str(i),
760760
lora_int_id=i + 1,
761761
lora_local_path="abc"))
762-
scheduler._allocate_and_set_running(seq_group, 60)
762+
scheduler._allocate_and_set_running(seq_group)
763763
append_new_token_seq_group(60, seq_group, 1)
764764
scheduler._swap_out(seq_group, blocks_to_swap_out)
765765
swapped.append(seq_group)
@@ -783,7 +783,7 @@ def test_schedule_swapped_cannot_swap_in():
783783
blocks_to_swap_out = {}
784784
for _ in range(2):
785785
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
786-
scheduler._allocate_and_set_running(seq_group, 60)
786+
scheduler._allocate_and_set_running(seq_group)
787787
append_new_token_seq_group(60, seq_group, 1)
788788
scheduler._swap_out(seq_group, blocks_to_swap_out)
789789
swapped.append(seq_group)
@@ -808,7 +808,7 @@ def test_schedule_swapped_blocks_to_copy():
808808
policy = PolicyFactory.get_policy(policy_name="fcfs")
809809
curr_loras = None
810810
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
811-
scheduler._allocate_and_set_running(seq_group, 60)
811+
scheduler._allocate_and_set_running(seq_group)
812812
append_new_token_seq_group(60, seq_group, 1)
813813
blocks_to_swap_out = {}
814814
scheduler._swap_out(seq_group, blocks_to_swap_out)

vllm/core/scheduler.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ def num_decoding_tokens_per_seq(self) -> int:
297297

298298
def add_seq_group(self, seq_group: SequenceGroup) -> None:
299299
# Add sequence groups to the waiting queue.
300-
logger.debug(f"add_seq_group {seq_group.request_id}")
301300
self.waiting.append(seq_group)
302301

303302
def abort_seq_group(self, request_id: Union[str, Iterable[str]]) -> None:
@@ -427,7 +426,6 @@ def _schedule_running(
427426
swapped_out.append(seq_group)
428427
break
429428
else:
430-
logger.debug(f"append slot for {seq_group}")
431429
self._append_slots(seq_group, blocks_to_copy)
432430
is_prefill = seq_group.is_prefill()
433431
if is_prefill:
@@ -659,7 +657,7 @@ def _schedule_prefills(
659657
if curr_loras is not None and lora_int_id > 0:
660658
curr_loras.add(lora_int_id)
661659
waiting_queue.popleft()
662-
self._allocate_and_set_running(seq_group, num_new_tokens)
660+
self._allocate_and_set_running(seq_group)
663661
seq_groups.append(
664662
ScheduledSequenceGroup(seq_group=seq_group,
665663
token_chunk_size=num_new_tokens))
@@ -952,8 +950,7 @@ def free_finished_seq_groups(self) -> None:
952950
self.running = deque(seq_group for seq_group in self.running
953951
if not seq_group.is_finished())
954952

955-
def _allocate_and_set_running(self, seq_group: SequenceGroup,
956-
num_new_tokens: int) -> None:
953+
def _allocate_and_set_running(self, seq_group: SequenceGroup) -> None:
957954
self.block_manager.allocate(seq_group)
958955
for seq in seq_group.get_seqs(status=SequenceStatus.WAITING):
959956
seq.status = SequenceStatus.RUNNING

0 commit comments

Comments
 (0)