Skip to content

Commit ddcec28

Browse files
Fix implementation divergence for BLOOM models between vLLM and HuggingFace when using prompt embeds (vllm-project#24686)
Signed-off-by: Andrew Sansom <[email protected]>
1 parent e090b7b commit ddcec28

File tree

2 files changed

+4
-5
lines changed

2 files changed

+4
-5
lines changed

tests/models/language/generation/test_common.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3-
import os
43
from typing import Optional
54

65
import pytest
@@ -99,9 +98,10 @@
9998
@pytest.mark.parametrize("num_logprobs", [5])
10099
@pytest.mark.parametrize(
101100
"use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False])
101+
@pytest.mark.parametrize("use_prompt_embeds", [True, False])
102102
def test_models(hf_runner, vllm_runner, example_prompts, model: str,
103103
max_tokens: int, num_logprobs: int, use_rocm_aiter: bool,
104-
monkeypatch) -> None:
104+
use_prompt_embeds: bool, monkeypatch) -> None:
105105

106106
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
107107
model_info.check_available_online(on_fail="skip")
@@ -119,8 +119,6 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str,
119119
# in parts of the operators
120120
pytest.skip(f"Skipping '{model}' model test with AITER kernel.")
121121

122-
use_prompt_embeds = os.getenv("VLLM_USE_V1") == "0"
123-
124122
with hf_runner(model) as hf_model:
125123
hf_outputs = hf_model.generate_greedy_logprobs_limit(
126124
example_prompts, max_tokens, num_logprobs)

vllm/model_executor/models/bloom.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
257257
config.hidden_size))
258258

259259
def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
260-
return self.word_embeddings_layernorm(self.word_embeddings(input_ids))
260+
return self.word_embeddings(input_ids)
261261

262262
def forward(
263263
self,
@@ -271,6 +271,7 @@ def forward(
271271
hidden_states = inputs_embeds
272272
else:
273273
hidden_states = self.get_input_embeddings(input_ids)
274+
hidden_states = self.word_embeddings_layernorm(hidden_states)
274275
else:
275276
assert intermediate_tensors is not None
276277
hidden_states = intermediate_tensors["hidden_states"]

0 commit comments

Comments
 (0)