@@ -229,58 +229,6 @@ WORKDIR /usr/src/flash-attention-v2
229
229
RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
230
230
--no-build-isolation --no-deps --no-cache-dir
231
231
232
-
233
- ## Test ########################################################################
234
- FROM dev AS test
235
-
236
- WORKDIR /vllm-workspace
237
- # ADD is used to preserve directory structure
238
- # NB: Could leak secrets from local context, the test image should not be pushed
239
- # to a registry
240
- ADD . /vllm-workspace/
241
- # copy pytorch extensions separately to avoid having to rebuild
242
- # when python code changes
243
- COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
244
- # Install flash attention (from pre-built wheel)
245
- RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
246
- pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
247
- # ignore build dependencies installation because we are using pre-complied extensions
248
- RUN rm pyproject.toml
249
- RUN --mount=type=cache,target=/root/.cache/pip \
250
- VLLM_USE_PRECOMPILED=1 pip install . --verbose
251
-
252
-
253
- ## Proto Compilation ###########################################################
254
- FROM python-base AS gen-protos
255
-
256
- RUN microdnf install -y \
257
- make \
258
- findutils \
259
- && microdnf clean all
260
-
261
- RUN --mount=type=cache,target=/root/.cache/pip \
262
- --mount=type=bind,source=Makefile,target=Makefile \
263
- --mount=type=bind,source=proto,target=proto \
264
- make gen-protos
265
-
266
- ## vLLM Library Files ##########################################################
267
- # Little extra stage to gather files and manage permissions on them without any
268
- # duplication in the release layer due to permission changes
269
- FROM base AS vllm
270
-
271
- WORKDIR /vllm-staging
272
- # COPY files from various places into a staging directory
273
- COPY vllm vllm
274
- COPY --from=build /workspace/vllm/*.so vllm/
275
- COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb
276
-
277
- # custom COPY command to use umask to control permissions and grant permissions
278
- # to the group
279
- RUN umask 002 \
280
- && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
281
- # not strictly needed, but .so files typically have executable bits
282
- && chmod +x /workspace/vllm/*.so
283
-
284
232
## Release #####################################################################
285
233
# Note from the non-UBI Dockerfile:
286
234
# We used base cuda image because pytorch installs its own cuda libraries.
0 commit comments