Skip to content

Commit a1b9cb2

Browse files
authored
[BugFix] Fix recovery logic for sequence group (#2186)
1 parent 3a4fd5c commit a1b9cb2

File tree

2 files changed

+10
-8
lines changed

2 files changed

+10
-8
lines changed

vllm/core/block_manager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def __init__(
103103
def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
104104
# FIXME(woosuk): Here we assume that all sequences in the group share
105105
# the same prompt. This may not be true for preempted sequences.
106-
seq = seq_group.get_seqs()[0]
106+
seq = seq_group.get_seqs(status=SequenceStatus.WAITING)[0]
107107
num_required_blocks = len(seq.logical_token_blocks)
108108
if self.block_sliding_window is not None:
109109
num_required_blocks = min(num_required_blocks,
@@ -122,7 +122,7 @@ def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
122122
def allocate(self, seq_group: SequenceGroup) -> None:
123123
# NOTE: Here we assume that all sequences in the group have the same
124124
# prompt.
125-
seq = seq_group.get_seqs()[0]
125+
seq = seq_group.get_seqs(status=SequenceStatus.WAITING)[0]
126126

127127
# Allocate new physical token blocks that will store the prompt tokens.
128128
block_table: BlockTable = []
@@ -137,7 +137,7 @@ def allocate(self, seq_group: SequenceGroup) -> None:
137137
block_table.append(block)
138138

139139
# Assign the block table for each sequence.
140-
for seq in seq_group.get_seqs():
140+
for seq in seq_group.get_seqs(status=SequenceStatus.WAITING):
141141
self.block_tables[seq.seq_id] = block_table.copy()
142142

143143
def can_append_slot(self, seq_group: SequenceGroup) -> bool:

vllm/core/scheduler.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,15 +139,17 @@ def _schedule(self) -> SchedulerOutputs:
139139
while self.waiting:
140140
seq_group = self.waiting[0]
141141

142-
assert seq_group.num_seqs() == 1, (
142+
waiting_seqs = seq_group.get_seqs(
143+
status=SequenceStatus.WAITING)
144+
assert len(waiting_seqs) == 1, (
143145
"Waiting sequence group should have only one prompt "
144146
"sequence.")
145-
num_prompt_tokens = seq_group.get_seqs()[0].get_len()
147+
num_prompt_tokens = waiting_seqs[0].get_len()
146148
if num_prompt_tokens > self.prompt_limit:
147149
logger.warning(
148150
f"Input prompt ({num_prompt_tokens} tokens) is too long"
149151
f" and exceeds limit of {self.prompt_limit}")
150-
for seq in seq_group.get_seqs():
152+
for seq in waiting_seqs:
151153
seq.status = SequenceStatus.FINISHED_IGNORED
152154
ignored_seq_groups.append(seq_group)
153155
self.waiting.pop(0)
@@ -161,7 +163,7 @@ def _schedule(self) -> SchedulerOutputs:
161163
logger.warning(
162164
f"Input prompt ({num_prompt_tokens} tokens) is too long"
163165
f" and exceeds the capacity of block_manager")
164-
for seq in seq_group.get_seqs():
166+
for seq in waiting_seqs:
165167
seq.status = SequenceStatus.FINISHED_IGNORED
166168
ignored_seq_groups.append(seq_group)
167169
self.waiting.pop(0)
@@ -317,7 +319,7 @@ def free_finished_seq_groups(self) -> None:
317319

318320
def _allocate(self, seq_group: SequenceGroup) -> None:
319321
self.block_manager.allocate(seq_group)
320-
for seq in seq_group.get_seqs():
322+
for seq in seq_group.get_seqs(status=SequenceStatus.WAITING):
321323
seq.status = SequenceStatus.RUNNING
322324

323325
def _append_slot(

0 commit comments

Comments
 (0)