|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +""" |
| 3 | +Run `pytest tests/entrypoints/openai/test_embedding_dimensions.py`. |
| 4 | +""" |
| 5 | + |
| 6 | +from typing import NamedTuple |
| 7 | + |
| 8 | +import openai |
| 9 | +import pytest |
| 10 | + |
| 11 | +from vllm.entrypoints.openai.protocol import EmbeddingResponse |
| 12 | + |
| 13 | +from ...utils import RemoteOpenAIServer |
| 14 | + |
| 15 | + |
| 16 | +class ModelInfo(NamedTuple): |
| 17 | + name: str |
| 18 | + is_matryoshka: bool |
| 19 | + |
| 20 | + |
| 21 | +MODELS = [ |
| 22 | + ModelInfo(name="BAAI/bge-m3", is_matryoshka=False), |
| 23 | + ModelInfo(name="jinaai/jina-embeddings-v3", is_matryoshka=True), |
| 24 | +] |
| 25 | + |
| 26 | +input_texts = [ |
| 27 | + "The chef prepared a delicious meal.", |
| 28 | +] * 3 |
| 29 | + |
| 30 | + |
| 31 | +@pytest.mark.asyncio |
| 32 | +@pytest.mark.parametrize("model", MODELS) |
| 33 | +async def test_validating_dimensions(model: ModelInfo): |
| 34 | + args = [ |
| 35 | + "--task", |
| 36 | + "embed", |
| 37 | + # use half precision for speed and memory savings in CI environment |
| 38 | + "--dtype", |
| 39 | + "bfloat16", |
| 40 | + "--enforce-eager", |
| 41 | + "--max-model-len", |
| 42 | + "512", |
| 43 | + "--trust_remote_code" |
| 44 | + ] |
| 45 | + with RemoteOpenAIServer(model.name, args) as remote_server: |
| 46 | + client = remote_server.get_async_client() |
| 47 | + |
| 48 | + async def make_request(dimensions): |
| 49 | + embedding_response = await client.embeddings.create( |
| 50 | + model=model.name, |
| 51 | + input=input_texts, |
| 52 | + dimensions=dimensions, |
| 53 | + encoding_format="float", |
| 54 | + ) |
| 55 | + embeddings = EmbeddingResponse.model_validate( |
| 56 | + embedding_response.model_dump(mode="json")) |
| 57 | + |
| 58 | + assert embeddings.id is not None |
| 59 | + assert len(embeddings.data) == 3 |
| 60 | + assert len(embeddings.data[0].embedding) > 0 |
| 61 | + assert embeddings.usage.completion_tokens == 0 |
| 62 | + assert embeddings.usage.prompt_tokens > 0 |
| 63 | + assert embeddings.usage.total_tokens > 0 |
| 64 | + |
| 65 | + if dimensions is not None: |
| 66 | + assert len(embeddings.data[0].embedding) == dimensions |
| 67 | + |
| 68 | + if model.is_matryoshka: |
| 69 | + for dimensions in [None, 16]: |
| 70 | + await make_request(dimensions) |
| 71 | + |
| 72 | + with pytest.raises(openai.BadRequestError): |
| 73 | + for dimensions in [-1]: |
| 74 | + await make_request(dimensions) |
| 75 | + |
| 76 | + else: |
| 77 | + for dimensions in [None]: |
| 78 | + await make_request(dimensions) |
| 79 | + |
| 80 | + with pytest.raises(openai.BadRequestError): |
| 81 | + for dimensions in [-1, 16]: |
| 82 | + await make_request(dimensions) |
0 commit comments