Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit c737a7a

Browse files
ci/build/feat: bump vLLM libs to v0.4.2 and other deps in Dockerfile.ubi (#23)
Changes: - vLLM v0.4.2 was published today, update our build to use pre-built libs from their wheel - bump other dependencies in the image build (base UBI image, miniforge, flash attention, grpcio-tools, accelerate) - little cleanup to remove `PYTORCH_` args that are no longer used --------- Signed-off-by: Travis Johnson <[email protected]>
1 parent 2caabff commit c737a7a

File tree

1 file changed

+8
-11
lines changed

1 file changed

+8
-11
lines changed

Dockerfile.ubi

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,8 @@
22
# docs/source/dev/dockerfile-ubi/dockerfile-ubi.rst
33

44
## Global Args #################################################################
5-
ARG BASE_UBI_IMAGE_TAG=9.3-1612
5+
ARG BASE_UBI_IMAGE_TAG=9.4-949.1714662671
66
ARG PYTHON_VERSION=3.11
7-
ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
8-
# ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
9-
ARG PYTORCH_VERSION=2.1.2
107

118
# NOTE: This setting only has an effect when not using prebuilt-wheel kernels
129
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
@@ -30,7 +27,7 @@ RUN microdnf install -y \
3027
FROM base as python-install
3128

3229
ARG PYTHON_VERSION
33-
ARG MINIFORGE_VERSION=23.11.0-0
30+
ARG MINIFORGE_VERSION=24.3.0-0
3431

3532
RUN curl -fsSL -o ~/miniforge3.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-$(uname)-$(uname -m).sh" && \
3633
chmod +x ~/miniforge3.sh && \
@@ -163,8 +160,8 @@ RUN microdnf install -y \
163160
&& microdnf clean all
164161

165162
ARG PYTHON_VERSION
166-
# 0.4.1 is built for CUDA 12.1 and PyTorch 2.1.2
167-
ARG VLLM_WHEEL_VERSION=0.4.1
163+
# 0.4.2 is built for CUDA 12.1 and PyTorch 2.3.0
164+
ARG VLLM_WHEEL_VERSION=0.4.2
168165

169166
RUN curl -Lo vllm.whl https://github.com/vllm-project/vllm/releases/download/v${VLLM_WHEEL_VERSION}/vllm-${VLLM_WHEEL_VERSION}-cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}-manylinux1_x86_64.whl \
170167
&& unzip vllm.whl \
@@ -220,7 +217,7 @@ COPY --from=gen-protos --link /workspace/vllm/entrypoints/grpc/pb vllm/entrypoin
220217
ENV CCACHE_DIR=/root/.cache/ccache
221218
RUN --mount=type=cache,target=/root/.cache/ccache \
222219
--mount=type=cache,target=/root/.cache/pip \
223-
python3 setup.py bdist_wheel --dist-dir=dist
220+
VLLM_USE_PRECOMPILED=1 python3 setup.py bdist_wheel --dist-dir=dist
224221

225222
#################### FLASH_ATTENTION Build IMAGE ####################
226223
FROM dev as flash-attn-builder
@@ -232,7 +229,7 @@ RUN microdnf install -y git \
232229
ARG max_jobs=2
233230
ENV MAX_JOBS=${max_jobs}
234231
# flash attention version
235-
ARG flash_attn_version=v2.5.6
232+
ARG flash_attn_version=v2.5.8
236233
ENV FLASH_ATTN_VERSION=${flash_attn_version}
237234

238235
WORKDIR /usr/src/flash-attention-v2
@@ -266,9 +263,9 @@ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,ta
266263
RUN --mount=type=cache,target=/root/.cache/pip \
267264
pip3 install \
268265
# additional dependencies for the TGIS gRPC server
269-
grpcio-tools==1.62.1 \
266+
grpcio-tools==1.63.0 \
270267
# additional dependencies for openai api_server
271-
accelerate==0.28.0 \
268+
accelerate==0.30.0 \
272269
# hf_transfer for faster HF hub downloads
273270
hf_transfer==0.1.6
274271

0 commit comments

Comments
 (0)