Skip to content

Commit 65abdfb

Browse files
committed
Call compute_input_embeddings only for prompt to save decode time
1 parent dacac74 commit 65abdfb

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

vllm/model_executor/models/internvl.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,10 +1728,11 @@ def forward(
17281728
# NOTE: In v1, inputs_embeds is always generated at model runner, this
17291729
# condition is for v0 compatibility.
17301730
elif inputs_embeds is None:
1731-
vision_embeddings = self.get_multimodal_embeddings(**kwargs)
1732-
inputs_embeds = self.get_input_embeddings(input_ids,
1733-
vision_embeddings)
1734-
input_ids = None
1731+
if not is_hpu:
1732+
vision_embeddings = self.get_multimodal_embeddings(**kwargs)
1733+
inputs_embeds = self.get_input_embeddings(input_ids,
1734+
vision_embeddings)
1735+
input_ids = None
17351736

17361737
forward_kwargs = {
17371738
"input_ids": input_ids,

vllm/worker/hpu_model_runner.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,8 @@ def forward(self, *args, **kwargs):
739739
if self._rotary_prepare_cos_sin is not None and not self.model_is_mrope:
740740
self._rotary_prepare_cos_sin(
741741
kwargs['positions'], recompute_cos_sin=self.recompute_cos_sin)
742-
if self.model_is_mrope or self.is_mm_optimized:
742+
if self.model_is_mrope or (self.is_mm_optimized
743+
and kwargs['attn_metadata'].is_prompt):
743744
# inputs_embeds was computed on execute_model
744745
# now we always want to use the inputs_embeds
745746
# even if the prompt is text only
@@ -3901,7 +3902,8 @@ def try_revert_dummy_output_tokens():
39013902
model_input.multi_modal_kwargs.pop('image_index', None)
39023903

39033904
if not bypass_model_exec:
3904-
if self.model_is_mrope or self.is_mm_optimized:
3905+
if self.model_is_mrope or (self.is_mm_optimized
3906+
and is_prompt):
39053907
if ('pixel_values') in execute_model_kwargs and \
39063908
self.is_mm_optimized:
39073909
if warmup_mode and not is_pt_profiler_run:

0 commit comments

Comments
 (0)