[New Model]: Snowflake Arctic Embed (Family) (vllm-project#16649)

noooop · web-flow · commit 3d3ab3689f19 · 2025-04-18T08:11:57.000-07:00
diff --git a/tests/entrypoints/openai/test_embedding_dimensions.py b/tests/entrypoints/openai/test_embedding_dimensions.py
@@ -3,24 +3,17 @@
 Run `pytest tests/entrypoints/openai/test_embedding_dimensions.py`.
 """
 
-from typing import NamedTuple
-
 import openai
 import pytest
 
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 
+from ...models.embedding.utils import EmbedModelInfo
 from ...utils import RemoteOpenAIServer
 
-
-class ModelInfo(NamedTuple):
-    name: str
-    is_matryoshka: bool
-
-
 MODELS = [
-    ModelInfo(name="BAAI/bge-m3", is_matryoshka=False),
-    ModelInfo(name="jinaai/jina-embeddings-v3", is_matryoshka=True),
+    EmbedModelInfo(name="BAAI/bge-m3", is_matryoshka=False),
+    EmbedModelInfo(name="jinaai/jina-embeddings-v3", is_matryoshka=True),
 ]
 
 input_texts = [
@@ -30,7 +23,7 @@ class ModelInfo(NamedTuple):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model", MODELS)
-async def test_validating_dimensions(model: ModelInfo):
+async def test_validating_dimensions(model: EmbedModelInfo):
     args = [
         "--task",
         "embed",
diff --git a/tests/models/embedding/language/test_snowflake_arctic_embed.py b/tests/models/embedding/language/test_snowflake_arctic_embed.py
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Compare the embedding outputs of HF and vLLM models.
+
+Run `pytest tests/models/embedding/language/test_snowflake_arctic_embed.py`.
+"""
+import pytest
+
+from tests.models.embedding.utils import EmbedModelInfo
+
+from ..utils import check_embeddings_close
+
+EMBEDDING_PROMPTS = [
+    'what is snowflake?', 'Where can I get the best tacos?', 'The Data Cloud!',
+    'Mexico City of Course!'
+]
+
+MODELS = [
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-xs",
+                   is_matryoshka=False,
+                   architecture="BertModel",
+                   enable_test=True),
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-s",
+                   is_matryoshka=False,
+                   architecture="BertModel",
+                   enable_test=False),
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-m",
+                   is_matryoshka=False,
+                   architecture="BertModel",
+                   enable_test=False),
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-long",
+                   is_matryoshka=False,
+                   architecture="NomicBertModel",
+                   enable_test=True),
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-l",
+                   is_matryoshka=False,
+                   architecture="BertModel",
+                   enable_test=False),
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v1.5",
+                   is_matryoshka=True,
+                   architecture="BertModel",
+                   enable_test=True),
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-l-v2.0",
+                   is_matryoshka=True,
+                   architecture="XLMRobertaModel",
+                   enable_test=True),
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v2.0",
+                   is_matryoshka=True,
+                   architecture="GteModel",
+                   enable_test=True),
+]
+
+
+@pytest.mark.parametrize("model_info", MODELS)
+@pytest.mark.parametrize("dtype", ["half"])
+def test_models(
+    hf_runner,
+    vllm_runner,
+    example_prompts,
+    model_info: EmbedModelInfo,
+    dtype: str,
+    monkeypatch,
+) -> None:
+    if not model_info.enable_test:
+        # A model family has many models with the same architecture,
+        # and we don't need to test each one.
+        pytest.skip("Skipping test.")
+
+    example_prompts = example_prompts + EMBEDDING_PROMPTS
+
+    vllm_extra_kwargs = {
+        "hf_overrides": {
+            "is_matryoshka": model_info.is_matryoshka
+        }
+    }
+
+    with hf_runner(model_info.name, dtype=dtype,
+                   is_sentence_transformer=True) as hf_model:
+        hf_outputs = hf_model.encode(example_prompts)
+
+    with vllm_runner(model_info.name,
+                     task="embed",
+                     dtype=dtype,
+                     max_model_len=None,
+                     **vllm_extra_kwargs) as vllm_model:
+
+        assert (vllm_model.model.llm_engine.model_config.is_matryoshka ==
+                model_info.is_matryoshka)
+
+        if model_info.architecture:
+            assert (model_info.architecture
+                    in vllm_model.model.llm_engine.model_config.architectures)
+
+        vllm_outputs = vllm_model.encode(example_prompts)
+
+    check_embeddings_close(
+        embeddings_0_lst=hf_outputs,
+        embeddings_1_lst=vllm_outputs,
+        name_0="hf",
+        name_1="vllm",
+        tol=1e-2,
+    )
diff --git a/tests/models/embedding/utils.py b/tests/models/embedding/utils.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from collections.abc import Sequence
+from typing import NamedTuple
 
 import torch
 import torch.nn.functional as F
@@ -37,3 +38,10 @@ def matryoshka_fy(tensor, dimensions):
     tensor = tensor[..., :dimensions]
     tensor = F.normalize(tensor, p=2, dim=1)
     return tensor
+
+
+class EmbedModelInfo(NamedTuple):
+    name: str
+    is_matryoshka: bool
+    architecture: str = ""
+    enable_test: bool = True
diff --git a/tests/models/registry.py b/tests/models/registry.py
@@ -247,11 +247,15 @@ def check_available_online(
     "BertModel": _HfExamplesInfo("BAAI/bge-base-en-v1.5"),
     "Gemma2Model": _HfExamplesInfo("BAAI/bge-multilingual-gemma2"),
     "GritLM": _HfExamplesInfo("parasail-ai/GritLM-7B-vllm"),
+    "GteModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-v2.0",
+                                               trust_remote_code=True),
     "InternLM2ForRewardModel": _HfExamplesInfo("internlm/internlm2-1_8b-reward",
                                                trust_remote_code=True),
     "JambaForSequenceClassification": _HfExamplesInfo("ai21labs/Jamba-tiny-reward-dev"),  # noqa: E501
     "LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
     "MistralModel": _HfExamplesInfo("intfloat/e5-mistral-7b-instruct"),
+    "NomicBertModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-long",  # noqa: E501
+                                               trust_remote_code=True),
     "Qwen2Model": _HfExamplesInfo("ssmits/Qwen2-7B-Instruct-embed-base"),
     "Qwen2ForRewardModel": _HfExamplesInfo("Qwen/Qwen2.5-Math-RM-72B"),
     "Qwen2ForProcessRewardModel": _HfExamplesInfo("Qwen/Qwen2.5-Math-PRM-7B"),
diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
@@ -354,6 +354,7 @@ def get_act_fn(act_fn_name: str) -> nn.Module:
 _ACTIVATION_AND_MUL_REGISTRY = LazyDict({
     "gelu": lambda: GeluAndMul(),
     "silu": lambda: SiluAndMul(),
+    "gelu_and_mul": lambda: GeluAndMul(),
 })
 
 
diff --git a/vllm/model_executor/models/bert.py b/vllm/model_executor/models/bert.py
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py