Skip to content

Commit 1b67b04

Browse files
[Misc] Remove more get_input_embeddings_v0 (vllm-project#25857)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent bd51f78 commit 1b67b04

File tree

4 files changed

+7
-83
lines changed

4 files changed

+7
-83
lines changed

vllm/model_executor/models/gemma3n_mm.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@
4545
from .interfaces import (MultiModalEmbeddings, SupportsMultiModal,
4646
SupportsTranscription)
4747
from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn,
48-
init_vllm_registered_model, maybe_prefix,
49-
merge_multimodal_embeddings)
48+
init_vllm_registered_model, maybe_prefix)
5049

5150
logger = init_logger(__name__)
5251

vllm/model_executor/models/keye.py

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
from vllm.multimodal.profiling import BaseDummyInputsBuilder
4242
from vllm.platforms import _Backend
4343
from vllm.sequence import IntermediateTensors
44-
from vllm.transformers_utils.config import uses_mrope
4544
from vllm.utils import is_list_of
4645
from vllm.utils.tensor_schema import TensorSchema, TensorShape
4746

@@ -50,7 +49,7 @@
5049
from .siglip import SiglipMLP
5150
from .utils import (AutoWeightsLoader, WeightsMapper,
5251
init_vllm_registered_model, is_pp_missing_parameter,
53-
maybe_prefix, merge_multimodal_embeddings)
52+
maybe_prefix)
5453
from .vision import get_vit_attn_backend
5554

5655
logger = init_logger(__name__)
@@ -1450,32 +1449,6 @@ def get_multimodal_embeddings(
14501449
multimodal_embeddings += video_embeddings
14511450
return multimodal_embeddings
14521451

1453-
def get_input_embeddings_v0(
1454-
self,
1455-
input_ids: torch.Tensor,
1456-
image_input: Optional[Any] = None,
1457-
video_input: Optional[Any] = None,
1458-
) -> torch.Tensor:
1459-
inputs_embeds = self.get_input_embeddings(input_ids)
1460-
if image_input is not None:
1461-
image_embeds = self._process_image_input(image_input)
1462-
inputs_embeds = merge_multimodal_embeddings(
1463-
input_ids,
1464-
inputs_embeds,
1465-
image_embeds,
1466-
placeholder_token_id=self.config.image_token_id,
1467-
)
1468-
1469-
if video_input is not None:
1470-
video_embeds = self._process_video_input(video_input)
1471-
inputs_embeds = merge_multimodal_embeddings(
1472-
input_ids,
1473-
inputs_embeds,
1474-
video_embeds,
1475-
placeholder_token_id=self.config.video_token_id,
1476-
)
1477-
return inputs_embeds
1478-
14791452
def forward(
14801453
self,
14811454
input_ids: torch.Tensor,
@@ -1500,23 +1473,6 @@ def forward(
15001473
if intermediate_tensors is not None:
15011474
inputs_embeds = None
15021475

1503-
elif inputs_embeds is None:
1504-
image_input = self._parse_and_validate_image_input(**kwargs)
1505-
video_input = self._parse_and_validate_video_input(**kwargs)
1506-
if image_input is None and video_input is None:
1507-
inputs_embeds = None
1508-
else:
1509-
if uses_mrope(self.config):
1510-
assert positions.ndim == 2 and positions.size(0) == 3, (
1511-
"multimodal section rotary embedding requires "
1512-
f"(3, seq_len) positions, but got {positions.size()}")
1513-
inputs_embeds = self.get_input_embeddings_v0(
1514-
input_ids,
1515-
image_input=image_input,
1516-
video_input=video_input,
1517-
)
1518-
input_ids = None
1519-
15201476
hidden_states = self.language_model.model(
15211477
input_ids=input_ids,
15221478
positions=positions,

vllm/model_executor/models/phi4_multimodal.py

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,7 @@
4444
from .idefics2_vision_model import Idefics2VisionTransformer
4545
from .interfaces import MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal
4646
from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn,
47-
init_vllm_registered_model, maybe_prefix,
48-
merge_multimodal_embeddings)
49-
50-
# <|endoftext10|> (see vocab.json in hf model)
51-
_IMAGE_PLACEHOLDER_TOKEN_ID = 200010
52-
# <|endoftext11|>
53-
_AUDIO_PLACEHOLDER_TOKEN_ID = 200011
47+
init_vllm_registered_model, maybe_prefix)
5448

5549
_AUDIO_MAX_SOUNDFILE_SIZE = 241_000
5650

@@ -1371,35 +1365,6 @@ def get_multimodal_embeddings(self,
13711365

13721366
return multimodal_embeddings
13731367

1374-
def get_input_embeddings_v0(
1375-
self,
1376-
input_ids: torch.Tensor,
1377-
image_input: Optional[Phi4MMImagePixelInputs] = None,
1378-
audio_input: Optional[Phi4MMAudioFeatureInputs] = None,
1379-
) -> torch.Tensor:
1380-
audio_projection_mode = 'speech'
1381-
inputs_embeds = self.get_input_embeddings(input_ids)
1382-
if image_input is not None:
1383-
image_embeds = self._process_image_input(image_input)
1384-
inputs_embeds = merge_multimodal_embeddings(
1385-
input_ids,
1386-
inputs_embeds,
1387-
image_embeds,
1388-
placeholder_token_id=_IMAGE_PLACEHOLDER_TOKEN_ID,
1389-
)
1390-
audio_projection_mode = 'vision'
1391-
1392-
if audio_input is not None:
1393-
audio_embeds = self._process_audio_input(
1394-
audio_input, audio_projection_mode=audio_projection_mode)
1395-
inputs_embeds = merge_multimodal_embeddings(
1396-
input_ids,
1397-
inputs_embeds,
1398-
audio_embeds,
1399-
placeholder_token_id=_AUDIO_PLACEHOLDER_TOKEN_ID,
1400-
)
1401-
return inputs_embeds
1402-
14031368
def forward(
14041369
self,
14051370
input_ids: torch.Tensor,

vllm/model_executor/models/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import torch.nn as nn
1111
from torch.func import functional_call
1212
from transformers import PretrainedConfig
13+
from typing_extensions import deprecated
1314

1415
import vllm.envs as envs
1516
from vllm.config import VllmConfig
@@ -439,6 +440,9 @@ def _merge_multimodal_embeddings(
439440
return inputs_embeds
440441

441442

443+
@deprecated("`merge_multimodal_embeddings` has been replaced with "
444+
"`SupportsMultiModal.get_input_embeddings` and will be "
445+
"removed in v0.12.")
442446
def merge_multimodal_embeddings(
443447
input_ids: torch.Tensor,
444448
inputs_embeds: torch.Tensor,

0 commit comments

Comments
 (0)