@@ -12,7 +12,7 @@ WORKDIR /app
12
12
RUN dnf remove -y --disableplugin=subscription-manager \
13
13
subscription-manager \
14
14
# we install newer version of requests via pip
15
- python3-requests \
15
+ python3.11 -requests \
16
16
&& dnf install -y make \
17
17
# to help with debugging
18
18
procps \
@@ -128,9 +128,10 @@ RUN cargo install --path .
128
128
# # Tests base ##################################################################
129
129
FROM base as test-base
130
130
131
- RUN dnf install -y make unzip python39 python3-pip gcc openssl-devel gcc-c++ && \
131
+ RUN dnf install -y make unzip python3.11 python3.11 -pip gcc openssl-devel gcc-c++ && \
132
132
dnf clean all && \
133
- ln -s /usr/bin/python3 /usr/local/bin/python && ln -s /usr/bin/pip3 /usr/local/bin/pip
133
+ ln -fs /usr/bin/python3.11 /usr/bin/python3 && \
134
+ ln -s /usr/bin/python3.11 /usr/local/bin/python && ln -s /usr/bin/pip3.11 /usr/local/bin/pip
134
135
135
136
RUN pip install --upgrade pip && pip install pytest && pip install pytest-asyncio
136
137
@@ -141,6 +142,7 @@ ENV CUDA_VISIBLE_DEVICES=""
141
142
FROM test-base as cpu-tests
142
143
ARG PYTORCH_INDEX
143
144
ARG PYTORCH_VERSION
145
+ ARG SITE_PACKAGES=/usr/local/lib/python3.11/site-packages
144
146
145
147
WORKDIR /usr/src
146
148
@@ -157,8 +159,7 @@ RUN cd server && \
157
159
pip install ".[accelerate]" --no-cache-dir
158
160
159
161
# Patch codegen model changes into transformers 4.34
160
- RUN cp server/transformers_patch/modeling_codegen.py \
161
- /usr/local/lib/python3.*/site-packages/transformers/models/codegen/modeling_codegen.py
162
+ RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
162
163
163
164
# Install router
164
165
COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/local/bin/text-generation-router
@@ -177,7 +178,7 @@ ARG PYTORCH_VERSION
177
178
RUN dnf install -y unzip git ninja-build && dnf clean all
178
179
179
180
RUN cd ~ && \
180
- curl -L -O https://repo.anaconda.com/miniconda/Miniconda3-py39_23 .5.2-0-Linux-x86_64.sh && \
181
+ curl -L -O https://repo.anaconda.com/miniconda/Miniconda3-py311_23 .5.2-0-Linux-x86_64.sh && \
181
182
chmod +x Miniconda3-*-Linux-x86_64.sh && \
182
183
bash ./Miniconda3-*-Linux-x86_64.sh -bf -p /opt/miniconda
183
184
@@ -187,7 +188,7 @@ RUN rm -r /opt/miniconda/pkgs/conda-content-trust-*/info/test/tests
187
188
ENV PATH=/opt/miniconda/bin:$PATH
188
189
189
190
# Install specific version of torch
190
- RUN pip install ninja==1.11.1 --no-cache-dir
191
+ RUN pip install ninja==1.11.1.1 --no-cache-dir
191
192
RUN pip install torch==$PYTORCH_VERSION+cu118 --index-url "${PYTORCH_INDEX}/cu118" --no-cache-dir
192
193
193
194
@@ -229,6 +230,7 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build /usr/src/flas
229
230
230
231
# # Final Inference Server image ################################################
231
232
FROM cuda-runtime as server-release
233
+ ARG SITE_PACKAGES=/opt/miniconda/lib/python3.11/site-packages
232
234
233
235
# Install C++ compiler (required at runtime when PT2_COMPILE is enabled)
234
236
RUN dnf install -y gcc-c++ && dnf clean all \
@@ -243,21 +245,20 @@ ENV PATH=/opt/miniconda/bin:$PATH
243
245
# These could instead come from explicitly cached images
244
246
245
247
# Copy build artifacts from flash attention builder
246
- COPY --from=flash-att-cache /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/miniconda/lib/python3.9/site-packages
247
- COPY --from=flash-att-cache /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/miniconda/lib/python3.9/site-packages
248
- COPY --from=flash-att-cache /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/miniconda/lib/python3.9/site-packages
248
+ COPY --from=flash-att-cache /usr/src/flash-attention/build/lib.linux-x86_64-cpython-* ${SITE_PACKAGES}
249
+ COPY --from=flash-att-cache /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-* ${SITE_PACKAGES}
250
+ COPY --from=flash-att-cache /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-* ${SITE_PACKAGES}
249
251
250
252
# Copy build artifacts from flash attention v2 builder
251
- COPY --from=flash-att-v2-cache /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-39 /opt/miniconda/lib/python3.9/site-packages
253
+ COPY --from=flash-att-v2-cache /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-* ${SITE_PACKAGES}
252
254
253
255
# Install server
254
256
COPY proto proto
255
257
COPY server server
256
258
RUN cd server && make gen-server && pip install ".[accelerate, onnx-gpu]" --no-cache-dir
257
259
258
- # Patch codegen model changes into transformers 4.34
259
- RUN cp server/transformers_patch/modeling_codegen.py \
260
- /opt/miniconda/lib/python3.*/site-packages/transformers/models/codegen/modeling_codegen.py
260
+ # Patch codegen model changes into transformers 4.34.0
261
+ RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
261
262
262
263
# Install router
263
264
COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/local/bin/text-generation-router
@@ -272,7 +273,7 @@ ENV PORT=3000 \
272
273
RUN chmod -R g+rwx ${HOME}
273
274
274
275
# Temporary for dev
275
- RUN chmod -R g+w /opt/miniconda/lib/python3.*/site-packages /text_generation_server /usr/src /usr/local/bin
276
+ RUN chmod -R g+w ${SITE_PACKAGES} /text_generation_server /usr/src /usr/local/bin
276
277
277
278
# Run as non-root user by default
278
279
USER tgis
0 commit comments