Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit ca06561

Browse files
committed
Dockerfile.ubi: improvements
- get rid of --link flags for COPY operations: not supported by openshift CI (buildah) See containers/buildah#4325 - get rid of conda - install ccache to leverage caching
1 parent 8a6c9c9 commit ca06561

File tree

1 file changed

+39
-48
lines changed

1 file changed

+39
-48
lines changed

Dockerfile.ubi

Lines changed: 39 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
1414

1515
## Base Layer ##################################################################
1616
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
17+
ARG PYTHON_VERSION
18+
19+
RUN microdnf install -y \
20+
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
21+
&& microdnf clean all
1722

1823
WORKDIR /workspace
1924

@@ -30,20 +35,16 @@ RUN microdnf install -y \
3035
FROM base as python-install
3136

3237
ARG PYTHON_VERSION
33-
ARG MINIFORGE_VERSION=23.11.0-0
34-
35-
RUN curl -fsSL -o ~/miniforge3.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-$(uname)-$(uname -m).sh" && \
36-
chmod +x ~/miniforge3.sh && \
37-
bash ~/miniforge3.sh -b -p /opt/conda && \
38-
source "/opt/conda/etc/profile.d/conda.sh" && \
39-
conda create -y -p /opt/vllm python=${PYTHON_VERSION} && \
40-
conda activate /opt/vllm && \
41-
rm ~/miniforge3.sh
42-
# use of the /opt/vllm env requires:
43-
# ENV PATH=/opt/vllm/bin/:$PATH
38+
39+
ENV VIRTUAL_ENV=/opt/vllm
40+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
41+
RUN microdnf install -y \
42+
python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
43+
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel && microdnf clean all
44+
4445

4546
## CUDA Base ###################################################################
46-
FROM base as cuda-base
47+
FROM python-install as cuda-base
4748

4849
# The Nvidia operator won't allow deploying on CUDA 12.0 hosts if
4950
# this env var is set to 12.2.0, even though it's compatible
@@ -63,26 +64,11 @@ RUN microdnf install -y \
6364
cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \
6465
&& microdnf clean all
6566

66-
ENV CUDA_HOME="/usr/local/cuda" \
67-
PATH="/usr/local/nvidia/bin:${CUDA_HOME}/bin:${PATH}" \
68-
LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
6967

70-
71-
## CUDA Runtime ################################################################
72-
FROM cuda-base as cuda-runtime
73-
74-
ENV NV_NVTX_VERSION=12.2.53-1 \
75-
NV_LIBNPP_VERSION=12.1.1.14-1 \
76-
NV_LIBCUBLAS_VERSION=12.2.1.16-1 \
77-
NV_LIBNCCL_PACKAGE_VERSION=2.18.5-1+cuda12.2
78-
79-
RUN microdnf install -y \
80-
cuda-libraries-12-2-${NV_CUDA_LIB_VERSION} \
81-
cuda-nvtx-12-2-${NV_NVTX_VERSION} \
82-
libnpp-12-2-${NV_LIBNPP_VERSION} \
83-
libcublas-12-2-${NV_LIBCUBLAS_VERSION} \
84-
libnccl-${NV_LIBNCCL_PACKAGE_VERSION} \
85-
&& microdnf clean all
68+
ARG CUDA_HOME="/usr/local/cuda"
69+
ENV CUDA_HOME=${CUDA_HOME}\
70+
PATH="${CUDA_HOME}/bin:${PATH}" \
71+
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
8672

8773

8874
## CUDA Development ############################################################
@@ -114,16 +100,16 @@ ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
114100
RUN ldconfig /usr/local/cuda-12.2/compat/
115101

116102
## Python cuda base #################################################################
117-
FROM cuda-devel as python-cuda-base
103+
FROM cuda-devel AS python-cuda-base
118104

119-
COPY --from=python-install /opt/vllm /opt/vllm
120-
ENV PATH=/opt/vllm/bin/:$PATH
105+
ENV VIRTUAL_ENV=/opt/vllm
106+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
121107

122108
# install cuda and common dependencies
123109
RUN --mount=type=cache,target=/root/.cache/pip \
124110
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
125111
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
126-
pip3 install \
112+
pip install \
127113
-r requirements-cuda.txt
128114

129115
## Development #################################################################
@@ -179,6 +165,10 @@ RUN --mount=type=cache,target=/root/.cache/pip \
179165
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
180166
pip install -r requirements-build.txt
181167

168+
# install compiler cache to speed up compilation leveraging local or remote caching
169+
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y ccache && microdnf clean all
170+
# install build dependencies
171+
182172
# copy input files
183173
COPY csrc csrc
184174
COPY setup.py setup.py
@@ -187,7 +177,6 @@ COPY CMakeLists.txt CMakeLists.txt
187177
COPY requirements-common.txt requirements-common.txt
188178
COPY requirements-cuda.txt requirements-cuda.txt
189179
COPY pyproject.toml pyproject.toml
190-
COPY vllm/__init__.py vllm/__init__.py
191180

192181
ARG TORCH_CUDA_ARCH_LIST
193182
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
@@ -201,7 +190,7 @@ ENV NVCC_THREADS=$nvcc_threads
201190
# make sure punica kernels are built (for LoRA)
202191
ENV VLLM_INSTALL_PUNICA_KERNELS=1
203192

204-
# Setup path stuff? Ref: https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/build.sh#L6-L8
193+
# Make sure the cuda environment is in the PATH
205194
ENV PATH=/usr/local/cuda/bin:$PATH
206195
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
207196

@@ -220,10 +209,12 @@ COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc
220209
ENV CCACHE_DIR=/root/.cache/ccache
221210
RUN --mount=type=cache,target=/root/.cache/ccache \
222211
--mount=type=cache,target=/root/.cache/pip \
223-
python3 setup.py bdist_wheel --dist-dir=dist
212+
python setup.py bdist_wheel --dist-dir=dist
224213

225214
#################### FLASH_ATTENTION Build IMAGE ####################
226215
FROM dev as flash-attn-builder
216+
ENV VIRTUAL_ENV=/opt/vllm/bin
217+
ENV PATH=${VIRTUAL_ENV}/bin:$PATH
227218

228219
RUN microdnf install -y git \
229220
&& microdnf clean all
@@ -246,13 +237,16 @@ RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
246237
# We used base cuda image because pytorch installs its own cuda libraries.
247238
# However pynccl depends on cuda libraries so we had to switch to the runtime image
248239
# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
249-
FROM cuda-runtime AS vllm-openai
240+
FROM python-install AS vllm-openai
250241

251242
WORKDIR /workspace
252243

253-
# Create release python environment
254-
COPY --from=python-cuda-base /opt/vllm /opt/vllm
255-
ENV PATH=/opt/vllm/bin/:$PATH
244+
ENV VIRTUAL_ENV=/opt/vllm
245+
ENV PATH=$VIRTUAL_ENV/bin/:$PATH
246+
247+
# Triton needs a CC compiler
248+
RUN microdnf install -y gcc \
249+
&& microdnf clean all
256250

257251
# install vllm wheel first, so that torch etc will be installed
258252
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
@@ -264,22 +258,19 @@ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,ta
264258
pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
265259

266260
RUN --mount=type=cache,target=/root/.cache/pip \
267-
pip3 install \
261+
pip install \
268262
# additional dependencies for the TGIS gRPC server
269-
grpcio-tools==1.62.1 \
263+
grpcio==1.62.1 \
270264
# additional dependencies for openai api_server
271265
accelerate==0.28.0 \
272266
# hf_transfer for faster HF hub downloads
273267
hf_transfer==0.1.6
274268

275-
# Triton needs a CC compiler
276-
RUN microdnf install -y gcc \
277-
&& microdnf clean all
278-
279269
ENV HF_HUB_OFFLINE=1 \
280270
PORT=8000 \
281271
GRPC_PORT=8033 \
282272
HOME=/home/vllm \
273+
VLLM_NCCL_SO_PATH=/opt/vllm/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2 \
283274
VLLM_USAGE_SOURCE=production-docker-image
284275

285276
# setup non-root user for OpenShift

0 commit comments

Comments
 (0)