@@ -139,15 +139,17 @@ def _schedule(self) -> SchedulerOutputs:
139
139
while self .waiting :
140
140
seq_group = self .waiting [0 ]
141
141
142
- assert seq_group .num_seqs () == 1 , (
142
+ waiting_seqs = seq_group .get_seqs (
143
+ status = SequenceStatus .WAITING )
144
+ assert len (waiting_seqs ) == 1 , (
143
145
"Waiting sequence group should have only one prompt "
144
146
"sequence." )
145
- num_prompt_tokens = seq_group . get_seqs () [0 ].get_len ()
147
+ num_prompt_tokens = waiting_seqs [0 ].get_len ()
146
148
if num_prompt_tokens > self .prompt_limit :
147
149
logger .warning (
148
150
f"Input prompt ({ num_prompt_tokens } tokens) is too long"
149
151
f" and exceeds limit of { self .prompt_limit } " )
150
- for seq in seq_group . get_seqs () :
152
+ for seq in waiting_seqs :
151
153
seq .status = SequenceStatus .FINISHED_IGNORED
152
154
ignored_seq_groups .append (seq_group )
153
155
self .waiting .pop (0 )
@@ -161,7 +163,7 @@ def _schedule(self) -> SchedulerOutputs:
161
163
logger .warning (
162
164
f"Input prompt ({ num_prompt_tokens } tokens) is too long"
163
165
f" and exceeds the capacity of block_manager" )
164
- for seq in seq_group . get_seqs () :
166
+ for seq in waiting_seqs :
165
167
seq .status = SequenceStatus .FINISHED_IGNORED
166
168
ignored_seq_groups .append (seq_group )
167
169
self .waiting .pop (0 )
@@ -317,7 +319,7 @@ def free_finished_seq_groups(self) -> None:
317
319
318
320
def _allocate (self , seq_group : SequenceGroup ) -> None :
319
321
self .block_manager .allocate (seq_group )
320
- for seq in seq_group .get_seqs ():
322
+ for seq in seq_group .get_seqs (status = SequenceStatus . WAITING ):
321
323
seq .status = SequenceStatus .RUNNING
322
324
323
325
def _append_slot (
0 commit comments