Skip to content

Commit 2ec8827

Browse files
authored
[Bugfix] Qwen-vl output is inconsistent in speculative decoding (#10350)
1 parent b40cf64 commit 2ec8827

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

vllm/spec_decode/batch_expansion.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ def _create_single_target_seq_group_metadata(
353353
seq_data = seq_group_metadata.seq_data[seq_id]
354354
prompt_token_ids = seq_data.prompt_token_ids_array
355355
new_output_token_ids = [*seq_data.get_output_token_ids(), *token_ids]
356+
mrope_position_delta = seq_data.mrope_position_delta
356357

357358
new_seq_data_dict = {
358359
target_seq_id:
@@ -368,6 +369,7 @@ def _create_single_target_seq_group_metadata(
368369
# the kv cache is filled by a previous batch in the batch expansion.
369370
for data in new_seq_data_dict.values():
370371
data.update_num_computed_tokens(data.get_len() - 1)
372+
data.mrope_position_delta = mrope_position_delta
371373

372374
return SequenceGroupMetadata(
373375
request_id=seq_group_metadata.request_id,

0 commit comments

Comments
 (0)