Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 5048126

Browse files
committed
add ubi Dockerfile
1 parent 8b8fed5 commit 5048126

File tree

1 file changed

+244
-0
lines changed

1 file changed

+244
-0
lines changed

Dockerfile.ubi

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
# Please update any changes made here to
2+
# docs/source/dev/dockerfile-ubi/dockerfile-ubi.rst
3+
4+
## Global Args #################################################################
5+
ARG BASE_UBI_IMAGE_TAG=9.4
6+
ARG PYTHON_VERSION=3.11
7+
8+
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
9+
10+
11+
## Base Layer ##################################################################
12+
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
13+
ARG PYTHON_VERSION
14+
15+
RUN microdnf install -y \
16+
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
17+
&& microdnf clean all
18+
19+
WORKDIR /workspace
20+
21+
ENV LANG=C.UTF-8 \
22+
LC_ALL=C.UTF-8
23+
24+
# Some utils for dev purposes - tar required for kubectl cp
25+
RUN microdnf install -y \
26+
which procps findutils tar vim git\
27+
&& microdnf clean all
28+
29+
30+
## Python Installer ############################################################
31+
FROM base as python-install
32+
33+
ARG PYTHON_VERSION
34+
35+
ENV VIRTUAL_ENV=/opt/vllm
36+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
37+
RUN microdnf install -y \
38+
python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
39+
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel && microdnf clean all
40+
41+
42+
## CUDA Base ###################################################################
43+
FROM python-install as cuda-base
44+
45+
# The Nvidia operator won't allow deploying on CUDA 12.0 hosts if
46+
# this env var is set to 12.2.0, even though it's compatible
47+
#ENV CUDA_VERSION=12.2.0 \
48+
ENV CUDA_VERSION=12.0.0 \
49+
NV_CUDA_LIB_VERSION=12.2.0-1 \
50+
NVIDIA_VISIBLE_DEVICES=all \
51+
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
52+
NV_CUDA_CUDART_VERSION=12.2.53-1 \
53+
NV_CUDA_COMPAT_VERSION=535.104.12
54+
55+
RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
56+
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
57+
58+
RUN microdnf install -y \
59+
cuda-cudart-12-2-${NV_CUDA_CUDART_VERSION} \
60+
cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \
61+
&& microdnf clean all
62+
63+
64+
ARG CUDA_HOME="/usr/local/cuda"
65+
ENV CUDA_HOME=${CUDA_HOME}\
66+
PATH="${CUDA_HOME}/bin:${PATH}" \
67+
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
68+
69+
70+
## CUDA Development ############################################################
71+
FROM cuda-base as cuda-devel
72+
73+
ENV NV_CUDA_CUDART_DEV_VERSION=12.2.53-1 \
74+
NV_NVML_DEV_VERSION=12.2.81-1 \
75+
NV_LIBCUBLAS_DEV_VERSION=12.2.1.16-1 \
76+
NV_LIBNPP_DEV_VERSION=12.1.1.14-1 \
77+
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.5-1+cuda12.2
78+
79+
RUN microdnf install -y \
80+
cuda-command-line-tools-12-2-${NV_CUDA_LIB_VERSION} \
81+
cuda-libraries-devel-12-2-${NV_CUDA_LIB_VERSION} \
82+
cuda-minimal-build-12-2-${NV_CUDA_LIB_VERSION} \
83+
cuda-cudart-devel-12-2-${NV_CUDA_CUDART_DEV_VERSION} \
84+
cuda-nvml-devel-12-2-${NV_NVML_DEV_VERSION} \
85+
libcublas-devel-12-2-${NV_LIBCUBLAS_DEV_VERSION} \
86+
libnpp-devel-12-2-${NV_LIBNPP_DEV_VERSION} \
87+
libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
88+
&& microdnf clean all
89+
90+
ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
91+
92+
# Workaround for https://github.com/openai/triton/issues/2507 and
93+
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
94+
# this won't be needed for future versions of this docker image
95+
# or future versions of triton.
96+
RUN ldconfig /usr/local/cuda-12.2/compat/
97+
98+
## Python cuda base #################################################################
99+
FROM cuda-devel AS python-cuda-base
100+
101+
ENV VIRTUAL_ENV=/opt/vllm
102+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
103+
104+
# install cuda and common dependencies
105+
RUN --mount=type=cache,target=/root/.cache/pip \
106+
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
107+
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
108+
pip install \
109+
-r requirements-cuda.txt
110+
111+
## Development #################################################################
112+
FROM python-cuda-base AS dev
113+
114+
# install build and runtime dependencies
115+
RUN --mount=type=cache,target=/root/.cache/pip \
116+
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
117+
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
118+
--mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
119+
pip3 install \
120+
-r requirements-cuda.txt \
121+
-r requirements-dev.txt
122+
123+
## Proto Compilation ###########################################################
124+
FROM python-install AS gen-protos
125+
126+
ENV PATH=/opt/vllm/bin/:$PATH
127+
128+
RUN microdnf install -y \
129+
make \
130+
findutils \
131+
&& microdnf clean all
132+
133+
RUN --mount=type=cache,target=/root/.cache/pip \
134+
--mount=type=bind,source=Makefile,target=Makefile \
135+
--mount=type=bind,source=proto,target=proto \
136+
make gen-protos
137+
138+
## Builder #####################################################################
139+
FROM dev AS build
140+
141+
# install build dependencies
142+
RUN --mount=type=cache,target=/root/.cache/pip \
143+
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
144+
pip install -r requirements-build.txt
145+
146+
# install compiler cache to speed up compilation leveraging local or remote caching
147+
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y ccache && microdnf clean all
148+
# install build dependencies
149+
150+
# copy input files
151+
COPY csrc csrc
152+
COPY setup.py setup.py
153+
COPY cmake cmake
154+
COPY CMakeLists.txt CMakeLists.txt
155+
COPY requirements-common.txt requirements-common.txt
156+
COPY requirements-cuda.txt requirements-cuda.txt
157+
COPY pyproject.toml pyproject.toml
158+
159+
ARG TORCH_CUDA_ARCH_LIST
160+
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
161+
162+
# max jobs used by Ninja to build extensions
163+
ARG max_jobs=2
164+
ENV MAX_JOBS=${max_jobs}
165+
# number of threads used by nvcc
166+
ARG nvcc_threads=8
167+
ENV NVCC_THREADS=$nvcc_threads
168+
# make sure punica kernels are built (for LoRA)
169+
ENV VLLM_INSTALL_PUNICA_KERNELS=1
170+
171+
# Make sure the cuda environment is in the PATH
172+
ENV PATH=/usr/local/cuda/bin:$PATH
173+
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
174+
175+
# Copy the entire directory before building wheel
176+
COPY vllm vllm
177+
178+
# Copy over the generated *.pb2 files
179+
COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb
180+
181+
ENV CCACHE_DIR=/root/.cache/ccache
182+
RUN --mount=type=cache,target=/root/.cache/ccache \
183+
--mount=type=cache,target=/root/.cache/pip \
184+
CMAKE_BUILD_TYPE=Release python3 setup.py bdist_wheel --dist-dir=dist
185+
186+
## Release #####################################################################
187+
# Note from the non-UBI Dockerfile:
188+
# We used base cuda image because pytorch installs its own cuda libraries.
189+
# However pynccl depends on cuda libraries so we had to switch to the runtime image
190+
# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
191+
FROM python-install AS vllm-openai
192+
193+
WORKDIR /workspace
194+
195+
ENV VIRTUAL_ENV=/opt/vllm
196+
ENV PATH=$VIRTUAL_ENV/bin/:$PATH
197+
198+
# Triton needs a CC compiler
199+
RUN microdnf install -y gcc \
200+
&& microdnf clean all
201+
202+
# install vllm wheel first, so that torch etc will be installed
203+
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
204+
--mount=type=cache,target=/root/.cache/pip \
205+
pip install dist/*.whl --verbose
206+
207+
# vllm requires a specific nccl version built from source distribution
208+
# See https://github.com/NVIDIA/nccl/issues/1234
209+
RUN pip install \
210+
-v \
211+
--force-reinstall \
212+
--no-binary="all" \
213+
--no-cache-dir \
214+
"vllm-nccl-cu12==2.18.1.0.4.0" && \
215+
mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /opt/vllm/lib/ && \
216+
chmod 0755 /opt/vllm/lib/libnccl.so.2.18.1
217+
218+
219+
RUN --mount=type=cache,target=/root/.cache/pip \
220+
pip install \
221+
# additional dependencies for the TGIS gRPC server
222+
grpcio-tools==1.63.0 \
223+
# additional dependencies for openai api_server
224+
accelerate==0.30.0 \
225+
# hf_transfer for faster HF hub downloads
226+
hf_transfer==0.1.6
227+
228+
ENV HF_HUB_OFFLINE=1 \
229+
PORT=8000 \
230+
GRPC_PORT=8033 \
231+
HOME=/home/vllm \
232+
VLLM_NCCL_SO_PATH=/opt/vllm/lib/libnccl.so.2.18.1 \
233+
VLLM_USAGE_SOURCE=production-docker-image \
234+
VLLM_WORKER_MULTIPROC_METHOD=fork
235+
236+
# setup non-root user for OpenShift
237+
RUN umask 002 \
238+
&& useradd --uid 2000 --gid 0 vllm \
239+
&& chmod g+rwx $HOME /usr/src /workspace
240+
241+
COPY LICENSE /licenses/vllm.md
242+
243+
USER 2000
244+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

0 commit comments

Comments
 (0)