1
1
# # Global Args #################################################################
2
- ARG BASE_UBI_IMAGE_TAG=9.2-696
3
- ARG PROTOC_VERSION=23.4
2
+ ARG BASE_UBI_IMAGE_TAG=9.2-722
3
+ ARG PROTOC_VERSION=24.0
4
4
# ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
5
5
ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
6
6
ARG PYTORCH_VERSION=2.0.1
7
- ARG OPTIMUM_VERSION=1.9.1
8
7
9
8
# # Base Layer ##################################################################
10
9
FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} as base
11
10
WORKDIR /app
12
11
13
- RUN dnf install -y --disableplugin=subscription-manager \
14
- make \
15
- # to help with debugging
16
- procps \
17
- && dnf clean all --disableplugin=subscription-manager
12
+ RUN dnf remove -y --disableplugin=subscription-manager \
13
+ subscription-manager \
14
+ # we install newer version of requests via pip
15
+ python3-requests \
16
+ && dnf install -y make \
17
+ # to help with debugging
18
+ procps \
19
+ && dnf clean all
18
20
19
21
ENV LANG=C.UTF-8 \
20
22
LC_ALL=C.UTF-8
@@ -29,14 +31,14 @@ ENV CUDA_VERSION=11.8.0 \
29
31
NV_CUDA_CUDART_VERSION=11.8.89-1 \
30
32
NV_CUDA_COMPAT_VERSION=520.61.05-1
31
33
32
- RUN dnf config-manager --disableplugin=subscription-manager \
34
+ RUN dnf config-manager \
33
35
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
34
- && dnf install -y --disableplugin=subscription-manager \
36
+ && dnf install -y \
35
37
cuda-cudart-11-8-${NV_CUDA_CUDART_VERSION} \
36
38
cuda-compat-11-8-${NV_CUDA_COMPAT_VERSION} \
37
39
&& echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \
38
40
&& echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf \
39
- && dnf clean all --disableplugin=subscription-manager
41
+ && dnf clean all
40
42
41
43
ENV CUDA_HOME="/usr/local/cuda" \
42
44
PATH="/usr/local/nvidia/bin:${CUDA_HOME}/bin:${PATH}" \
@@ -50,15 +52,15 @@ ENV NV_NVTX_VERSION=11.8.86-1 \
50
52
NV_LIBCUBLAS_VERSION=11.11.3.6-1 \
51
53
NV_LIBNCCL_PACKAGE_VERSION=2.15.5-1+cuda11.8
52
54
53
- RUN dnf config-manager --disableplugin=subscription-manager \
55
+ RUN dnf config-manager \
54
56
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
55
- && dnf install -y --disableplugin=subscription-manager \
57
+ && dnf install -y \
56
58
cuda-libraries-11-8-${NV_CUDA_LIB_VERSION} \
57
59
cuda-nvtx-11-8-${NV_NVTX_VERSION} \
58
60
libnpp-11-8-${NV_LIBNPP_VERSION} \
59
61
libcublas-11-8-${NV_LIBCUBLAS_VERSION} \
60
62
libnccl-${NV_LIBNCCL_PACKAGE_VERSION} \
61
- && dnf clean all --disableplugin=subscription-manager
63
+ && dnf clean all
62
64
63
65
# # CUDA Development ############################################################
64
66
FROM cuda-base as cuda-devel
@@ -69,9 +71,9 @@ ENV NV_CUDA_CUDART_DEV_VERSION=11.8.89-1 \
69
71
NV_LIBNPP_DEV_VERSION=11.8.0.86-1 \
70
72
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.15.5-1+cuda11.8
71
73
72
- RUN dnf config-manager --disableplugin=subscription-manager \
74
+ RUN dnf config-manager \
73
75
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
74
- && dnf install -y --disableplugin=subscription-manager make \
76
+ && dnf install -y \
75
77
cuda-command-line-tools-11-8-${NV_CUDA_LIB_VERSION} \
76
78
cuda-libraries-devel-11-8-${NV_CUDA_LIB_VERSION} \
77
79
cuda-minimal-build-11-8-${NV_CUDA_LIB_VERSION} \
@@ -80,7 +82,7 @@ RUN dnf config-manager --disableplugin=subscription-manager \
80
82
libcublas-devel-11-8-${NV_LIBCUBLAS_DEV_VERSION} \
81
83
libnpp-devel-11-8-${NV_LIBNPP_DEV_VERSION} \
82
84
libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
83
- && dnf clean all --disableplugin=subscription-manager
85
+ && dnf clean all
84
86
85
87
ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
86
88
@@ -126,8 +128,8 @@ RUN cargo install --path .
126
128
# # Tests base ##################################################################
127
129
FROM base as test-base
128
130
129
- RUN dnf install -y --disableplugin=subscription-manager make unzip python39 python3-pip gcc openssl-devel gcc-c++ && \
130
- dnf clean all --disableplugin=subscription-manager && \
131
+ RUN dnf install -y make unzip python39 python3-pip gcc openssl-devel gcc-c++ && \
132
+ dnf clean all && \
131
133
ln -s /usr/bin/python3 /usr/local/bin/python && ln -s /usr/bin/pip3 /usr/local/bin/pip
132
134
133
135
RUN pip install --upgrade pip && pip install pytest && pip install pytest-asyncio
@@ -139,16 +141,12 @@ ENV CUDA_VISIBLE_DEVICES=""
139
141
FROM test-base as cpu-tests
140
142
ARG PYTORCH_INDEX
141
143
ARG PYTORCH_VERSION
142
- ARG OPTIMUM_VERSION
143
144
144
145
WORKDIR /usr/src
145
146
146
147
# Install specific version of torch
147
148
RUN pip install torch=="$PYTORCH_VERSION+cpu" --index-url "${PYTORCH_INDEX}/cpu" --no-cache-dir
148
149
149
- # Install optimum - not used in tests for now
150
- # RUN pip install optimum==$OPTIMUM_VERSION --no-cache-dir
151
-
152
150
COPY server/Makefile server/Makefile
153
151
154
152
# Install server
@@ -175,13 +173,8 @@ RUN cd integration_tests && make install
175
173
FROM cuda-devel as python-builder
176
174
ARG PYTORCH_INDEX
177
175
ARG PYTORCH_VERSION
178
- ARG OPTIMUM_VERSION
179
176
180
- RUN dnf install -y --disableplugin=subscription-manager \
181
- unzip \
182
- git \
183
- ninja-build \
184
- && dnf clean all --disableplugin=subscription-manager
177
+ RUN dnf install -y unzip git ninja-build && dnf clean all
185
178
186
179
RUN cd ~ && \
187
180
curl -L -O https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.2-0-Linux-x86_64.sh && \
@@ -222,18 +215,6 @@ FROM python-builder as build
222
215
COPY server/custom_kernels/ /usr/src/.
223
216
RUN cd /usr/src && python setup.py build_ext && python setup.py install
224
217
225
- # Install optimum
226
- RUN pip install optimum[onnxruntime-gpu]==$OPTIMUM_VERSION --no-cache-dir
227
-
228
- # Install onnx
229
- COPY server/Makefile-onnx server/Makefile
230
- RUN cd server && make install-onnx
231
-
232
- # Install onnx runtime
233
- COPY server/Makefile-onnx-runtime server/Makefile
234
- RUN cd server && make install-onnx-runtime
235
-
236
-
237
218
# # Flash attention cached build image ##########################################
238
219
FROM base as flash-att-cache
239
220
COPY --from=flash-att-builder /usr/src/flash-attention/build /usr/src/flash-attention/build
@@ -250,8 +231,7 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build /usr/src/flas
250
231
FROM cuda-runtime as server-release
251
232
252
233
# Install C++ compiler (required at runtime when PT2_COMPILE is enabled)
253
- RUN dnf install -y --disableplugin=subscription-manager gcc-c++ \
254
- && dnf clean all --disableplugin=subscription-manager \
234
+ RUN dnf install -y gcc-c++ && dnf clean all \
255
235
&& useradd -u 2000 tgis -m -g 0
256
236
257
237
SHELL ["/bin/bash" , "-c" ]
@@ -275,7 +255,7 @@ COPY proto proto
275
255
COPY server server
276
256
RUN cd server && \
277
257
make gen-server && \
278
- pip install ".[bnb, accelerate]" --no-cache-dir
258
+ pip install ".[bnb, accelerate, onnx-gpu ]" --no-cache-dir
279
259
280
260
# Patch codegen model changes into transformers 4.31
281
261
RUN cp server/transformers_patch/modeling_codegen.py \
0 commit comments