|
2 | 2 | # docs/source/dev/dockerfile-ubi/dockerfile-ubi.rst
|
3 | 3 |
|
4 | 4 | ## Global Args #################################################################
|
5 |
| -ARG BASE_UBI_IMAGE_TAG=9.3-1612 |
| 5 | +ARG BASE_UBI_IMAGE_TAG=9.4-949.1714662671 |
6 | 6 | ARG PYTHON_VERSION=3.11
|
7 |
| -ARG PYTORCH_INDEX="https://download.pytorch.org/whl" |
8 |
| -# ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly" |
9 |
| -ARG PYTORCH_VERSION=2.1.2 |
10 | 7 |
|
11 | 8 | # NOTE: This setting only has an effect when not using prebuilt-wheel kernels
|
12 | 9 | ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
|
@@ -149,8 +146,8 @@ RUN microdnf install -y \
|
149 | 146 | && microdnf clean all
|
150 | 147 |
|
151 | 148 | ARG PYTHON_VERSION
|
152 |
| -# 0.4.1 is built for CUDA 12.1 and PyTorch 2.1.2 |
153 |
| -ARG VLLM_WHEEL_VERSION=0.4.1 |
| 149 | +# 0.4.2 is built for CUDA 12.1 and PyTorch 2.3.0 |
| 150 | +ARG VLLM_WHEEL_VERSION=0.4.2 |
154 | 151 |
|
155 | 152 | RUN curl -Lo vllm.whl https://github.com/vllm-project/vllm/releases/download/v${VLLM_WHEEL_VERSION}/vllm-${VLLM_WHEEL_VERSION}-cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}-manylinux1_x86_64.whl \
|
156 | 153 | && unzip vllm.whl \
|
@@ -223,7 +220,7 @@ RUN microdnf install -y git \
|
223 | 220 | ARG max_jobs=2
|
224 | 221 | ENV MAX_JOBS=${max_jobs}
|
225 | 222 | # flash attention version
|
226 |
| -ARG flash_attn_version=v2.5.6 |
| 223 | +ARG flash_attn_version=v2.5.8 |
227 | 224 | ENV FLASH_ATTN_VERSION=${flash_attn_version}
|
228 | 225 |
|
229 | 226 | WORKDIR /usr/src/flash-attention-v2
|
@@ -260,9 +257,9 @@ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,ta
|
260 | 257 | RUN --mount=type=cache,target=/root/.cache/pip \
|
261 | 258 | pip install \
|
262 | 259 | # additional dependencies for the TGIS gRPC server
|
263 |
| - grpcio==1.62.1 \ |
| 260 | + grpcio-tools==1.63.0 \ |
264 | 261 | # additional dependencies for openai api_server
|
265 |
| - accelerate==0.28.0 \ |
| 262 | + accelerate==0.30.0 \ |
266 | 263 | # hf_transfer for faster HF hub downloads
|
267 | 264 | hf_transfer==0.1.6
|
268 | 265 |
|
|
0 commit comments