Skip to content

Commit f23b4c0

Browse files
varun-sundar-rabindranathVarun Sundar Rabindranath
andauthored
[BugFix] Pad input buffers in _dummy_run (#26209)
Signed-off-by: Varun Sundar Rabindranath <[email protected]> Co-authored-by: Varun Sundar Rabindranath <[email protected]>
1 parent 9354095 commit f23b4c0

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3434,26 +3434,28 @@ def _dummy_run(
34343434
with self.maybe_dummy_run_with_lora(
34353435
self.lora_config, num_scheduled_tokens, remove_lora
34363436
):
3437-
model_kwargs = self._init_model_kwargs(num_tokens)
3437+
# Make sure padding doesn't exceed max_num_tokens
3438+
assert num_tokens_after_padding <= self.max_num_tokens
3439+
model_kwargs = self._init_model_kwargs(num_tokens_after_padding)
34383440
if self.supports_mm_inputs and not self.model_config.is_encoder_decoder:
34393441
input_ids = None
3440-
inputs_embeds = self.inputs_embeds.gpu[:num_tokens]
3442+
inputs_embeds = self.inputs_embeds.gpu[:num_tokens_after_padding]
34413443
model_kwargs = {
34423444
**model_kwargs,
34433445
**self._dummy_mm_kwargs(num_reqs),
34443446
}
34453447
elif self.enable_prompt_embeds:
34463448
input_ids = None
3447-
inputs_embeds = self.inputs_embeds.gpu[:num_tokens]
3448-
model_kwargs = self._init_model_kwargs(num_tokens)
3449+
inputs_embeds = self.inputs_embeds.gpu[:num_tokens_after_padding]
3450+
model_kwargs = self._init_model_kwargs(num_tokens_after_padding)
34493451
else:
3450-
input_ids = self.input_ids.gpu[:num_tokens]
3452+
input_ids = self.input_ids.gpu[:num_tokens_after_padding]
34513453
inputs_embeds = None
34523454

34533455
if self.uses_mrope:
3454-
positions = self.mrope_positions.gpu[:, :num_tokens]
3456+
positions = self.mrope_positions.gpu[:, :num_tokens_after_padding]
34553457
else:
3456-
positions = self.positions.gpu[:num_tokens]
3458+
positions = self.positions.gpu[:num_tokens_after_padding]
34573459

34583460
if get_pp_group().is_first_rank:
34593461
intermediate_tensors = None
@@ -3468,7 +3470,7 @@ def _dummy_run(
34683470
)
34693471

34703472
intermediate_tensors = self.sync_and_slice_intermediate_tensors(
3471-
num_tokens, None, False
3473+
num_tokens_after_padding, None, False
34723474
)
34733475

34743476
# filter out the valid batch descriptor

0 commit comments

Comments
 (0)