@@ -4,7 +4,6 @@ ARG PYTHON_VERSION=3.11
4
4
5
5
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
6
6
7
-
8
7
## Base Layer ##################################################################
9
8
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
10
9
ARG PYTHON_VERSION
@@ -39,61 +38,19 @@ RUN microdnf install -y \
39
38
## CUDA Base ###################################################################
40
39
FROM python-install as cuda-base
41
40
42
- # The Nvidia operator won't allow deploying on CUDA 12.0 hosts if
43
- # this env var is set to 12.2.0, even though it's compatible
44
- #ENV CUDA_VERSION=12.2.0 \
45
- ENV CUDA_VERSION=12.0.0 \
46
- NV_CUDA_LIB_VERSION=12.2.0-1 \
47
- NVIDIA_VISIBLE_DEVICES=all \
48
- NVIDIA_DRIVER_CAPABILITIES=compute,utility \
49
- NV_CUDA_CUDART_VERSION=12.2.53-1 \
50
- NV_CUDA_COMPAT_VERSION=535.104.12
51
-
52
41
RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
53
42
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
54
43
55
44
RUN microdnf install -y \
56
- cuda-cudart-12-2-${NV_CUDA_CUDART_VERSION} \
57
- cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \
58
- && microdnf clean all
45
+ cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
46
+ microdnf clean all
59
47
60
-
61
- ARG CUDA_HOME="/usr/local/cuda"
62
- ENV CUDA_HOME=${CUDA_HOME}\
48
+ ENV CUDA_HOME="/usr/local/cuda" \
63
49
PATH="${CUDA_HOME}/bin:${PATH}" \
64
50
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
65
51
66
-
67
- ## CUDA Development ############################################################
68
- FROM cuda-base as cuda-devel
69
-
70
- ENV NV_CUDA_CUDART_DEV_VERSION=12.2.53-1 \
71
- NV_NVML_DEV_VERSION=12.2.81-1 \
72
- NV_LIBCUBLAS_DEV_VERSION=12.2.1.16-1 \
73
- NV_LIBNPP_DEV_VERSION=12.1.1.14-1 \
74
- NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.5-1+cuda12.2
75
-
76
- RUN microdnf install -y \
77
- cuda-command-line-tools-12-2-${NV_CUDA_LIB_VERSION} \
78
- cuda-libraries-devel-12-2-${NV_CUDA_LIB_VERSION} \
79
- cuda-minimal-build-12-2-${NV_CUDA_LIB_VERSION} \
80
- cuda-cudart-devel-12-2-${NV_CUDA_CUDART_DEV_VERSION} \
81
- cuda-nvml-devel-12-2-${NV_NVML_DEV_VERSION} \
82
- libcublas-devel-12-2-${NV_LIBCUBLAS_DEV_VERSION} \
83
- libnpp-devel-12-2-${NV_LIBNPP_DEV_VERSION} \
84
- libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
85
- && microdnf clean all
86
-
87
- ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
88
-
89
- # Workaround for https://github.com/openai/triton/issues/2507 and
90
- # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
91
- # this won't be needed for future versions of this docker image
92
- # or future versions of triton.
93
- RUN ldconfig /usr/local/cuda-12.2/compat/
94
-
95
52
## Python cuda base #################################################################
96
- FROM cuda-devel AS python-cuda-base
53
+ FROM cuda-base AS python-cuda-base
97
54
98
55
ENV VIRTUAL_ENV=/opt/vllm
99
56
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
@@ -128,7 +85,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
128
85
pip install -r requirements-build.txt
129
86
130
87
# install compiler cache to speed up compilation leveraging local or remote caching
131
- RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y ccache && microdnf clean all
88
+ # git is required for the cutlass kernels
89
+ RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all
132
90
# install build dependencies
133
91
134
92
# copy input files
@@ -162,13 +120,12 @@ COPY vllm vllm
162
120
ENV CCACHE_DIR=/root/.cache/ccache
163
121
RUN --mount=type=cache,target=/root/.cache/ccache \
164
122
--mount=type=cache,target=/root/.cache/pip \
165
- CMAKE_BUILD_TYPE=Release python3 setup.py bdist_wheel --dist-dir=dist
123
+ env CFLAGS="-march=haswell" \
124
+ CXXFLAGS="$CFLAGS $CXXFLAGS" \
125
+ CMAKE_BUILD_TYPE=Release \
126
+ python3 setup.py bdist_wheel --dist-dir=dist
166
127
167
128
## Release #####################################################################
168
- # Note from the non-UBI Dockerfile:
169
- # We used base cuda image because pytorch installs its own cuda libraries.
170
- # However pynccl depends on cuda libraries so we had to switch to the runtime image
171
- # In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
172
129
FROM python-install AS vllm-openai
173
130
174
131
WORKDIR /workspace
0 commit comments