@@ -229,6 +229,58 @@ WORKDIR /usr/src/flash-attention-v2
229
229
RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
230
230
--no-build-isolation --no-deps --no-cache-dir
231
231
232
+
233
+ ## Test ########################################################################
234
+ FROM dev AS test
235
+
236
+ WORKDIR /vllm-workspace
237
+ # ADD is used to preserve directory structure
238
+ # NB: Could leak secrets from local context, the test image should not be pushed
239
+ # to a registry
240
+ ADD . /vllm-workspace/
241
+ # copy pytorch extensions separately to avoid having to rebuild
242
+ # when python code changes
243
+ COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
244
+ # Install flash attention (from pre-built wheel)
245
+ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
246
+ pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
247
+ # ignore build dependencies installation because we are using pre-complied extensions
248
+ RUN rm pyproject.toml
249
+ RUN --mount=type=cache,target=/root/.cache/pip \
250
+ VLLM_USE_PRECOMPILED=1 pip install . --verbose
251
+
252
+
253
+ ## Proto Compilation ###########################################################
254
+ FROM python-base AS gen-protos
255
+
256
+ RUN microdnf install -y \
257
+ make \
258
+ findutils \
259
+ && microdnf clean all
260
+
261
+ RUN --mount=type=cache,target=/root/.cache/pip \
262
+ --mount=type=bind,source=Makefile,target=Makefile \
263
+ --mount=type=bind,source=proto,target=proto \
264
+ make gen-protos
265
+
266
+ ## vLLM Library Files ##########################################################
267
+ # Little extra stage to gather files and manage permissions on them without any
268
+ # duplication in the release layer due to permission changes
269
+ FROM base AS vllm
270
+
271
+ WORKDIR /vllm-staging
272
+ # COPY files from various places into a staging directory
273
+ COPY vllm vllm
274
+ COPY --from=build /workspace/vllm/*.so vllm/
275
+ COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb
276
+
277
+ # custom COPY command to use umask to control permissions and grant permissions
278
+ # to the group
279
+ RUN umask 002 \
280
+ && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
281
+ # not strictly needed, but .so files typically have executable bits
282
+ && chmod +x /workspace/vllm/*.so
283
+
232
284
## Release #####################################################################
233
285
# Note from the non-UBI Dockerfile:
234
286
# We used base cuda image because pytorch installs its own cuda libraries.
0 commit comments