@@ -14,6 +14,11 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
14
14
15
15
## Base Layer ##################################################################
16
16
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
17
+ ARG PYTHON_VERSION
18
+
19
+ RUN microdnf install -y \
20
+ python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
21
+ && microdnf clean all
17
22
18
23
WORKDIR /workspace
19
24
@@ -30,20 +35,16 @@ RUN microdnf install -y \
30
35
FROM base as python-install
31
36
32
37
ARG PYTHON_VERSION
33
- ARG MINIFORGE_VERSION=23.11.0-0
34
-
35
- RUN curl -fsSL -o ~/miniforge3.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-$(uname)-$(uname -m).sh" && \
36
- chmod +x ~/miniforge3.sh && \
37
- bash ~/miniforge3.sh -b -p /opt/conda && \
38
- source "/opt/conda/etc/profile.d/conda.sh" && \
39
- conda create -y -p /opt/vllm python=${PYTHON_VERSION} && \
40
- conda activate /opt/vllm && \
41
- rm ~/miniforge3.sh
42
- # use of the /opt/vllm env requires:
43
- # ENV PATH=/opt/vllm/bin/:$PATH
38
+
39
+ ENV VIRTUAL_ENV=/opt/vllm
40
+ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
41
+ RUN microdnf install -y \
42
+ python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
43
+ python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel && microdnf clean all
44
+
44
45
45
46
## CUDA Base ###################################################################
46
- FROM base as cuda-base
47
+ FROM python-install as cuda-base
47
48
48
49
# The Nvidia operator won't allow deploying on CUDA 12.0 hosts if
49
50
# this env var is set to 12.2.0, even though it's compatible
@@ -63,26 +64,11 @@ RUN microdnf install -y \
63
64
cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \
64
65
&& microdnf clean all
65
66
66
- ENV CUDA_HOME="/usr/local/cuda" \
67
- PATH="/usr/local/nvidia/bin:${CUDA_HOME}/bin:${PATH}" \
68
- LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
69
67
70
-
71
- ## CUDA Runtime ################################################################
72
- FROM cuda-base as cuda-runtime
73
-
74
- ENV NV_NVTX_VERSION=12.2.53-1 \
75
- NV_LIBNPP_VERSION=12.1.1.14-1 \
76
- NV_LIBCUBLAS_VERSION=12.2.1.16-1 \
77
- NV_LIBNCCL_PACKAGE_VERSION=2.18.5-1+cuda12.2
78
-
79
- RUN microdnf install -y \
80
- cuda-libraries-12-2-${NV_CUDA_LIB_VERSION} \
81
- cuda-nvtx-12-2-${NV_NVTX_VERSION} \
82
- libnpp-12-2-${NV_LIBNPP_VERSION} \
83
- libcublas-12-2-${NV_LIBCUBLAS_VERSION} \
84
- libnccl-${NV_LIBNCCL_PACKAGE_VERSION} \
85
- && microdnf clean all
68
+ ARG CUDA_HOME="/usr/local/cuda"
69
+ ENV CUDA_HOME=${CUDA_HOME}\
70
+ PATH="${CUDA_HOME}/bin:${PATH}" \
71
+ LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
86
72
87
73
88
74
## CUDA Development ############################################################
@@ -114,16 +100,16 @@ ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
114
100
RUN ldconfig /usr/local/cuda-12.2/compat/
115
101
116
102
## Python cuda base #################################################################
117
- FROM cuda-devel as python-cuda-base
103
+ FROM cuda-devel AS python-cuda-base
118
104
119
- COPY --from=python-install /opt/vllm /opt/vllm
120
- ENV PATH=/opt/vllm/ bin/ :$PATH
105
+ ENV VIRTUAL_ENV= /opt/vllm
106
+ ENV PATH="$VIRTUAL_ENV/ bin:$PATH"
121
107
122
108
# install cuda and common dependencies
123
109
RUN --mount=type=cache,target=/root/.cache/pip \
124
110
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
125
111
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
126
- pip3 install \
112
+ pip install \
127
113
-r requirements-cuda.txt
128
114
129
115
## Development #################################################################
@@ -179,6 +165,10 @@ RUN --mount=type=cache,target=/root/.cache/pip \
179
165
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
180
166
pip install -r requirements-build.txt
181
167
168
+ # install compiler cache to speed up compilation leveraging local or remote caching
169
+ RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y ccache && microdnf clean all
170
+ # install build dependencies
171
+
182
172
# copy input files
183
173
COPY csrc csrc
184
174
COPY setup.py setup.py
@@ -187,7 +177,6 @@ COPY CMakeLists.txt CMakeLists.txt
187
177
COPY requirements-common.txt requirements-common.txt
188
178
COPY requirements-cuda.txt requirements-cuda.txt
189
179
COPY pyproject.toml pyproject.toml
190
- COPY vllm/__init__.py vllm/__init__.py
191
180
192
181
ARG TORCH_CUDA_ARCH_LIST
193
182
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
@@ -201,7 +190,7 @@ ENV NVCC_THREADS=$nvcc_threads
201
190
# make sure punica kernels are built (for LoRA)
202
191
ENV VLLM_INSTALL_PUNICA_KERNELS=1
203
192
204
- # Setup path stuff? Ref: https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/build.sh#L6-L8
193
+ # Make sure the cuda environment is in the PATH
205
194
ENV PATH=/usr/local/cuda/bin:$PATH
206
195
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
207
196
@@ -220,10 +209,12 @@ COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc
220
209
ENV CCACHE_DIR=/root/.cache/ccache
221
210
RUN --mount=type=cache,target=/root/.cache/ccache \
222
211
--mount=type=cache,target=/root/.cache/pip \
223
- python3 setup.py bdist_wheel --dist-dir=dist
212
+ python setup.py bdist_wheel --dist-dir=dist
224
213
225
214
#################### FLASH_ATTENTION Build IMAGE ####################
226
215
FROM dev as flash-attn-builder
216
+ ENV VIRTUAL_ENV=/opt/vllm/bin
217
+ ENV PATH=${VIRTUAL_ENV}/bin:$PATH
227
218
228
219
RUN microdnf install -y git \
229
220
&& microdnf clean all
@@ -246,13 +237,16 @@ RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
246
237
# We used base cuda image because pytorch installs its own cuda libraries.
247
238
# However pynccl depends on cuda libraries so we had to switch to the runtime image
248
239
# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
249
- FROM cuda-runtime AS vllm-openai
240
+ FROM python-install AS vllm-openai
250
241
251
242
WORKDIR /workspace
252
243
253
- # Create release python environment
254
- COPY --from=python-cuda-base /opt/vllm /opt/vllm
255
- ENV PATH=/opt/vllm/bin/:$PATH
244
+ ENV VIRTUAL_ENV=/opt/vllm
245
+ ENV PATH=$VIRTUAL_ENV/bin/:$PATH
246
+
247
+ # Triton needs a CC compiler
248
+ RUN microdnf install -y gcc \
249
+ && microdnf clean all
256
250
257
251
# install vllm wheel first, so that torch etc will be installed
258
252
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
@@ -264,22 +258,19 @@ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,ta
264
258
pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
265
259
266
260
RUN --mount=type=cache,target=/root/.cache/pip \
267
- pip3 install \
261
+ pip install \
268
262
# additional dependencies for the TGIS gRPC server
269
- grpcio-tools ==1.62.1 \
263
+ grpcio==1.62.1 \
270
264
# additional dependencies for openai api_server
271
265
accelerate==0.28.0 \
272
266
# hf_transfer for faster HF hub downloads
273
267
hf_transfer==0.1.6
274
268
275
- # Triton needs a CC compiler
276
- RUN microdnf install -y gcc \
277
- && microdnf clean all
278
-
279
269
ENV HF_HUB_OFFLINE=1 \
280
270
PORT=8000 \
281
271
GRPC_PORT=8033 \
282
272
HOME=/home/vllm \
273
+ VLLM_NCCL_SO_PATH=/opt/vllm/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2 \
283
274
VLLM_USAGE_SOURCE=production-docker-image
284
275
285
276
# setup non-root user for OpenShift
0 commit comments