5
5
ARG BASE_UBI_IMAGE_TAG=9.4
6
6
ARG PYTHON_VERSION=3.11
7
7
8
- # NOTE: This setting only has an effect when not using prebuilt-wheel kernels
9
8
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
10
9
11
10
@@ -136,24 +135,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \
136
135
--mount=type=bind,source=proto,target=proto \
137
136
make gen-protos
138
137
139
- ## Extension Cache #############################################################
140
- # Instead of compiling artifacts every build just copy from pre-built wheel
141
- # This might not work if the PyTorch and CUDA versions don't match!
142
- FROM base as prebuilt-wheel
143
-
144
- RUN microdnf install -y \
145
- unzip \
146
- && microdnf clean all
147
-
148
- ARG PYTHON_VERSION
149
- # 0.4.2 is built for CUDA 12.1 and PyTorch 2.3.0
150
- ARG VLLM_WHEEL_VERSION=0.4.2
151
-
152
- RUN curl -Lo vllm.whl https://github.com/vllm-project/vllm/releases/download/v${VLLM_WHEEL_VERSION}/vllm-${VLLM_WHEEL_VERSION}-cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}-manylinux1_x86_64.whl \
153
- && unzip vllm.whl \
154
- && rm vllm.whl
155
- # compiled extensions located at /workspace/vllm/*.so
156
-
157
138
## Builder #####################################################################
158
139
FROM dev AS build
159
140
@@ -194,10 +175,6 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
194
175
# Copy the entire directory before building wheel
195
176
COPY vllm vllm
196
177
197
- # Comment if building *.so files from scratch
198
- ##################################################
199
- # Copy the prebuilt *.so files
200
- COPY --from=prebuilt-wheel /workspace/vllm/*.so /workspace/vllm/
201
178
##################################################
202
179
203
180
# Copy over the generated *.pb2 files
@@ -206,7 +183,7 @@ COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc
206
183
ENV CCACHE_DIR=/root/.cache/ccache
207
184
RUN --mount=type=cache,target=/root/.cache/ccache \
208
185
--mount=type=cache,target=/root/.cache/pip \
209
- VLLM_USE_PRECOMPILED=1 python3 setup.py bdist_wheel --dist-dir=dist
186
+ CMAKE_BUILD_TYPE=Release python3 setup.py bdist_wheel --dist-dir=dist
210
187
211
188
#################### FLASH_ATTENTION Build IMAGE ####################
212
189
FROM dev as flash-attn-builder
0 commit comments