red-hat-data-services
diff --git a/‎.buildkite/test-pipeline.yaml‎
Lines changed: 9 additions & 1 deletion b/‎.buildkite/test-pipeline.yaml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎Dockerfile.rocm.ubi‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile.rocm.ubi‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile.ubi‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile.ubi‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/serving/openai_compatible_server.md‎
Lines changed: 13 additions & 0 deletions b/‎docs/source/serving/openai_compatible_server.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎examples/online_serving/openai_transcription_client.py‎
Lines changed: 23 additions & 0 deletions b/‎examples/online_serving/openai_transcription_client.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎requirements-common.txt‎
Lines changed: 3 additions & 4 deletions b/‎requirements-common.txt‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎requirements-test.in‎
Lines changed: 1 addition & 0 deletions b/‎requirements-test.in‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎requirements-test.txt‎
Lines changed: 5 additions & 0 deletions b/‎requirements-test.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎tests/entrypoints/openai/correctness/__init__.py‎ b/‎tests/entrypoints/openai/correctness/__init__.py‎
diff --git a/‎tests/entrypoints/openai/test_accuracy.py‎ renamed to ‎tests/entrypoints/openai/correctness/test_lmeval.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/entrypoints/openai/test_accuracy.py‎ renamed to ‎tests/entrypoints/openai/correctness/test_lmeval.py‎
Lines changed: 1 addition & 1 deletion
@@ -113,7 +113,7 @@ steps:
   - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/correctness/
   - pytest -v -s entrypoints/test_chat_utils.py
   - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
 
@@ -328,6 +328,14 @@ steps:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - bash ./run-tests.sh -c configs/models-small.txt -t 1
 
+- label: OpenAI API correctness
+  source_file_dependencies:
+  - csrc/
+  - vllm/entrypoints/openai/
+  - vllm/model_executor/models/whisper.py
+  commands: # LMEval+Transcription WER check
+  - pytest -s entrypoints/openai/correctness/
+
 - label: Encoder Decoder tests # 5min
   source_file_dependencies:
   - vllm/
 
@@ -1,5 +1,5 @@
 ## Global Args ##################################################################
-ARG BASE_UBI_IMAGE_TAG=9.5-1738816775
+ARG BASE_UBI_IMAGE_TAG=9.5-1739420147
 ARG PYTHON_VERSION=3.12
 # Default ROCm ARCHes to build vLLM for.
 ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
 
@@ -1,5 +1,5 @@
 ## Global Args #################################################################
-ARG BASE_UBI_IMAGE_TAG=9.5-1738816775
+ARG BASE_UBI_IMAGE_TAG=9.5-1739420147
 ARG PYTHON_VERSION=3.12
 
 ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
 
@@ -41,6 +41,8 @@ We currently support the following OpenAI APIs:
   - *Note: `parallel_tool_calls` and `user` parameters are ignored.*
 - [Embeddings API](#embeddings-api) (`/v1/embeddings`)
   - Only applicable to [embedding models](../models/pooling_models.md) (`--task embed`).
+- [Transcriptions API](#transcriptions-api) (`/v1/audio/transcriptions`)
+  - Only applicable to Automatic Speech Recognition (ASR) models (OpenAI Whisper) (`--task generate`).
 
 In addition, we have the following custom APIs:
 
@@ -296,6 +298,17 @@ For chat-like input (i.e. if `messages` is passed), these extra parameters are s
 :end-before: end-chat-embedding-extra-params
 :::
 
+(transcriptions-api)=
+
+### Transcriptions API
+
+Our Transcriptions API is compatible with [OpenAI's Transcriptions API](https://platform.openai.com/docs/api-reference/audio/createTranscription);
+you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
+
+<!-- TODO: api enforced limits + uploading audios -->
+
+Code example: <gh-file:examples/online_serving/openai_transcription_client.py>
+
 (tokenizer-api)=
 
 ### Tokenizer API
 
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+from openai import OpenAI
+
+from vllm.assets.audio import AudioAsset
+
+mary_had_lamb = AudioAsset('mary_had_lamb').get_local_path()
+winning_call = AudioAsset('winning_call').get_local_path()
+
+# Modify OpenAI's API key and API base to use vLLM's API server.
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+client = OpenAI(
+    api_key=openai_api_key,
+    base_url=openai_api_base,
+)
+with open(str(mary_had_lamb), "rb") as f:
+    transcription = client.audio.transcriptions.create(
+        file=f,
+        model="openai/whisper-large-v3",
+        language="en",
+        response_format="text",
+        temperature=0.0)
+    print("transcription result:", transcription)
@@ -8,12 +8,11 @@ py-cpuinfo
 transformers >= 4.48.2  # Required for Bamba model and Transformers backend.
 tokenizers >= 0.19.1  # Required for Llama 3.
 protobuf # Required by LlamaTokenizer.
-fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'
-fastapi >= 0.107.0, != 0.113.*, != 0.114.0; python_version >= '3.9'
+fastapi[standard] >= 0.107.0, < 0.113.0; python_version < '3.9'
+fastapi[standard]  >= 0.107.0, != 0.113.*, != 0.114.0; python_version >= '3.9'
 aiohttp
 openai >= 1.52.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support)
-uvicorn[standard]
-pydantic >= 2.9  # Required for fastapi >= 0.113.0
+pydantic >= 2.9
 prometheus_client >= 0.18.0
 pillow  # Required for image processing
 prometheus-fastapi-instrumentator >= 7.0.0
 
@@ -19,6 +19,7 @@ pqdm
 ray[adag]==2.40.0
 sentence-transformers # required for embedding tests
 soundfile # required for audio tests
+jiwer # required for audio tests
 timm # required for internvl test
 torch==2.5.1
 torchaudio==2.5.1
 
@@ -66,6 +66,7 @@ charset-normalizer==3.4.0
 click==8.1.7
     # via
     #   black
+    #   jiwer
     #   nltk
     #   ray
 colorama==0.4.6
@@ -187,6 +188,8 @@ jinja2==3.1.4
     # via
     #   datamodel-code-generator
     #   torch
+jiwer==3.0.5
+    # via -r requirements-test.in
 jmespath==1.0.1
     # via
     #   boto3
@@ -470,6 +473,8 @@ pyyaml==6.0.2
     #   timm
     #   transformers
     #   vocos
+rapidfuzz==3.12.1
+    # via jiwer
 ray[adag]==2.40.0
     # via -r requirements-test.in
 redis==5.2.0
 
@@ -13,7 +13,7 @@
 
 from vllm.platforms import current_platform
 
-from ...utils import RemoteOpenAIServer
+from ....utils import RemoteOpenAIServer
 
 MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
 NUM_CONCURRENT = 500