Skip to content

Commit 4f9731c

Browse files
yma11pi314ever
andauthored
[XPU] Add XPU Dockerfile and related docs (#1162)
Signed-off-by: Yan Ma <yan.ma@intel.com> Signed-off-by: Daniel Huang <daniel1.huang@intel.com> Co-authored-by: Daniel Huang <daniel1.huang@intel.com>
1 parent d2f015e commit 4f9731c

File tree

4 files changed

+208
-1
lines changed

4 files changed

+208
-1
lines changed

docker/Dockerfile.xpu

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# Argument to configure vllm base image if pre-built
2+
ARG VLLM_BASE=vllm-base
3+
4+
FROM intel/deep-learning-essentials:2025.3.2-0-devel-ubuntu24.04 AS vllm-base
5+
6+
WORKDIR /workspace/
7+
8+
ARG PYTHON_VERSION=3.12
9+
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/xpu"
10+
11+
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
12+
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
13+
add-apt-repository -y ppa:kobuk-team/intel-graphics
14+
15+
RUN apt clean && apt-get update -y && \
16+
apt-get install -y --no-install-recommends --fix-missing \
17+
curl \
18+
ffmpeg \
19+
git \
20+
libsndfile1 \
21+
libsm6 \
22+
libxext6 \
23+
libgl1 \
24+
lsb-release \
25+
libaio-dev \
26+
numactl \
27+
wget \
28+
vim \
29+
python3.12 \
30+
python3.12-dev \
31+
python3-pip
32+
33+
RUN apt update && apt upgrade -y && \
34+
apt install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd libze-intel-gpu-raytracing intel-ocloc && \
35+
apt install -y intel-oneapi-compiler-dpcpp-cpp-2025.3
36+
37+
ENV PATH="/root/.local/bin:$PATH"
38+
ENV VIRTUAL_ENV="/opt/venv"
39+
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
40+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
41+
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
42+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
43+
44+
# This oneccl contains the BMG support which is not the case for default version of oneapi 2025.2.
45+
ARG ONECCL_INSTALLER="intel-oneccl-2021.15.7.8_offline.sh"
46+
RUN wget "https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.7/${ONECCL_INSTALLER}" && \
47+
bash "${ONECCL_INSTALLER}" -a --silent --eula accept && \
48+
rm "${ONECCL_INSTALLER}" && \
49+
echo "source /opt/intel/oneapi/setvars.sh --force" >> /root/.bashrc && \
50+
echo "source /opt/intel/oneapi/ccl/2021.15/env/vars.sh --force" >> /root/.bashrc
51+
RUN rm -f /opt/intel/oneapi/ccl/latest && \
52+
ln -s /opt/intel/oneapi/ccl/2021.15 /opt/intel/oneapi/ccl/latest
53+
54+
SHELL ["bash", "-c"]
55+
CMD ["bash", "-c", "source /root/.bashrc && exec bash"]
56+
57+
WORKDIR /workspace/
58+
ENV UV_HTTP_TIMEOUT=500
59+
60+
# Configure package index for XPU
61+
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
62+
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
63+
ENV UV_INDEX_STRATEGY="unsafe-best-match"
64+
ENV UV_LINK_MODE="copy"
65+
66+
ARG VLLM_VERSION=v0.16.0
67+
RUN git clone -b ${VLLM_VERSION} https://github.com/vllm-project/vllm
68+
WORKDIR /workspace/vllm
69+
70+
RUN --mount=type=cache,target=/root/.cache/uv \
71+
uv pip install --upgrade pip && \
72+
uv pip install -r requirements/xpu.txt
73+
74+
# used for suffix method speculative decoding
75+
# build deps for proto + nanobind-based extensions to set up the build environment
76+
RUN --mount=type=cache,target=/root/.cache/uv \
77+
uv pip install grpcio-tools protobuf nanobind
78+
# arctic-inference is built from source which needs torch-xpu properly installed first
79+
RUN --mount=type=cache,target=/root/.cache/uv \
80+
source /opt/intel/oneapi/setvars.sh --force && \
81+
source /opt/intel/oneapi/ccl/2021.15/env/vars.sh --force && \
82+
export CMAKE_PREFIX_PATH="$(python -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH}" && \
83+
uv pip install --no-build-isolation arctic-inference==0.1.1
84+
85+
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/"
86+
87+
ENV VLLM_TARGET_DEVICE=xpu
88+
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
89+
90+
RUN --mount=type=cache,target=/root/.cache/uv \
91+
uv pip install --no-build-isolation .
92+
93+
CMD ["/bin/bash"]
94+
95+
FROM vllm-base AS vllm-openai
96+
97+
# install additional dependencies for openai api server
98+
RUN --mount=type=cache,target=/root/.cache/uv \
99+
uv pip install accelerate hf_transfer pytest pytest_asyncio lm_eval[api] modelscope
100+
101+
# install development dependencies (for testing)
102+
RUN uv pip install -e tests/vllm_test_utils
103+
104+
# install nixl from source code
105+
ENV NIXL_VERSION=0.7.0
106+
RUN python /workspace/vllm/tools/install_nixl_from_source_ubuntu.py
107+
108+
# ensure vllm is properly installed
109+
RUN python -c "import vllm, inspect; print(vllm.__file__)"
110+
RUN uv pip show vllm
111+
112+
CMD ["/bin/bash"]
113+
114+
ENTRYPOINT []
115+
116+
FROM ${VLLM_BASE} AS vllm-omni
117+
118+
WORKDIR /workspace/vllm-omni
119+
COPY . .
120+
121+
ENV VLLM_OMNI_TARGET_DEVICE=xpu
122+
RUN uv pip install --no-cache-dir ".[dev]" --no-build-isolation
123+
124+
# FIX triton
125+
RUN --mount=type=cache,target=/root/.cache/uv \
126+
uv pip uninstall triton triton-xpu && \
127+
uv pip install triton-xpu==3.6.0
128+
129+
# remove torch bundled oneccl to avoid conflicts
130+
RUN --mount=type=cache,target=/root/.cache/uv \
131+
uv pip uninstall oneccl oneccl-devel
132+
133+
FROM vllm-omni AS vllm-omni-openai
134+
135+
RUN ln -sf /usr/bin/python3 /usr/bin/python
136+
137+
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
138+
139+
ENTRYPOINT ["vllm", "serve", "--omni"]

docs/getting_started/installation/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ vLLM-Omni supports the following hardware platforms:
55
- [GPU](gpu.md)
66
- [NVIDIA CUDA](gpu.md)
77
- [AMD ROCm](gpu.md)
8+
- [Intel XPU](gpu.md)
89
- [NPU](npu.md)

docs/getting_started/installation/gpu.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr
1818

1919
--8<-- "docs/getting_started/installation/gpu/rocm.inc.md:requirements"
2020

21+
=== "Intel XPU"
22+
23+
--8<-- "docs/getting_started/installation/gpu/xpu.inc.md:requirements"
24+
2125
## Set up using Python
2226

2327
### Create a new Python environment
@@ -32,11 +36,14 @@ Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.1
3236

3337
--8<-- "docs/getting_started/installation/gpu/cuda.inc.md:pre-built-wheels"
3438

35-
3639
=== "AMD ROCm"
3740

3841
--8<-- "docs/getting_started/installation/gpu/rocm.inc.md:pre-built-wheels"
3942

43+
=== "Intel XPU"
44+
45+
--8<-- "docs/getting_started/installation/gpu/xpu.inc.md:pre-built-wheels"
46+
4047
[](){ #build-from-source }
4148

4249
### Build wheel from source
@@ -49,6 +56,10 @@ Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.1
4956

5057
--8<-- "docs/getting_started/installation/gpu/rocm.inc.md:build-wheel-from-source"
5158

59+
=== "Intel XPU"
60+
61+
--8<-- "docs/getting_started/installation/gpu/xpu.inc.md:build-wheel-from-source"
62+
5263
## Set up using Docker
5364

5465
### Pre-built images
@@ -61,8 +72,16 @@ Note: Pre-built wheels are currently only available for vLLM-Omni 0.11.0rc1, 0.1
6172

6273
--8<-- "docs/getting_started/installation/gpu/rocm.inc.md:pre-built-images"
6374

75+
=== "Intel XPU"
76+
77+
--8<-- "docs/getting_started/installation/gpu/xpu.inc.md:pre-built-images"
78+
6479
### Build your own docker image
6580

6681
=== "AMD ROCm"
6782

6883
--8<-- "docs/getting_started/installation/gpu/rocm.inc.md:build-docker"
84+
85+
=== "Intel XPU"
86+
87+
--8<-- "docs/getting_started/installation/gpu/xpu.inc.md:build-docker"
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# --8<-- [start:requirements]
2+
3+
- GPU: Validated on Intel® Arc™ B-Series.
4+
5+
# --8<-- [end:requirements]
6+
# --8<-- [start:set-up-using-python]
7+
8+
vLLM-Omni currently recommends using the Docker image setup steps below.
9+
10+
# --8<-- [start:pre-built-wheels]
11+
12+
# --8<-- [end:pre-built-wheels]
13+
14+
# --8<-- [start:build-wheel-from-source]
15+
16+
# --8<-- [end:build-wheel-from-source]
17+
18+
# --8<-- [start:build-docker]
19+
20+
#### Build docker image
21+
22+
```bash
23+
DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.xpu -t vllm-omni-xpu --shm-size=4g .
24+
```
25+
26+
#### Launch the docker image
27+
28+
##### Launch with OpenAI API Server
29+
30+
```
31+
docker run -it -d --shm-size 10g \
32+
--name {container_name} \
33+
--net=host \
34+
--ipc=host \
35+
--privileged \
36+
-v /dev/dri/by-path:/dev/dri/by-path \
37+
--device /dev/dri:/dev/dri \
38+
-v ~/.cache/huggingface:/root/.cache/huggingface \
39+
--env "HF_TOKEN=$HF_TOKEN" \
40+
vllm-omni-xpu \
41+
--model Qwen/Qwen2.5-Omni-3B --port 8091
42+
```
43+
44+
# --8<-- [end:build-docker]
45+
46+
# --8<-- [start:pre-built-images]
47+
48+
# --8<-- [end:pre-built-images]

0 commit comments

Comments
 (0)