Skip to content

Commit 2b1b3df

Browse files
Update Dockerfile to use gcc-toolset-14 and fix test case failures on power (ppc64le) (#28957)
Signed-off-by: Bhagyashri <[email protected]>
1 parent cca2d2c commit 2b1b3df

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,22 @@ function cpu_tests() {
2525

2626
# offline inference
2727
podman exec -it "$container_id" bash -c "
28+
export TORCH_COMPILE_DISABLE=1
2829
set -xve
2930
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" >> $HOME/test_basic.log
3031

3132
# Run basic model test
3233
podman exec -it "$container_id" bash -c "
34+
export TORCH_COMPILE_DISABLE=1
3335
set -evx
3436
pip install pytest pytest-asyncio einops peft Pillow soundfile transformers_stream_generator matplotlib
35-
pip install sentence-transformers datamodel_code_generator
37+
pip install sentence-transformers datamodel_code_generator tblib
3638
3739
# Note: disable Bart until supports V1
3840
# pytest -v -s tests/models/language/generation/test_bart.py -m cpu_model
39-
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-openai-community/gpt2]
40-
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-facebook/opt-125m]
41-
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-google/gemma-1.1-2b-it]
41+
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-openai-community/gpt2]
42+
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-facebook/opt-125m]
43+
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-google/gemma-1.1-2b-it]
4244
pytest -v -s tests/models/language/pooling/test_classification.py::test_models[float-jason9693/Qwen2.5-1.5B-apeach]
4345
# TODO: Below test case tests/models/language/pooling/test_embedding.py::test_models[True-ssmits/Qwen2-7B-Instruct-embed-base] fails on ppc64le. Disabling it for time being.
4446
# pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model" >> $HOME/test_rest.log

docker/Dockerfile.ppc64le

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS openbl
88

99
ARG MAX_JOBS
1010
ARG OPENBLAS_VERSION=0.3.30
11-
RUN microdnf install -y dnf && dnf install -y gcc-toolset-13 make wget unzip \
12-
&& source /opt/rh/gcc-toolset-13/enable \
11+
RUN microdnf install -y dnf && dnf install -y gcc-toolset-14 make wget unzip \
12+
&& source /opt/rh/gcc-toolset-14/enable \
1313
&& wget https://github.com/OpenMathLib/OpenBLAS/releases/download/v$OPENBLAS_VERSION/OpenBLAS-$OPENBLAS_VERSION.zip \
1414
&& unzip OpenBLAS-$OPENBLAS_VERSION.zip \
1515
&& cd OpenBLAS-$OPENBLAS_VERSION \
@@ -57,7 +57,7 @@ COPY --from=openblas-builder /tmp/control /dev/null
5757
RUN --mount=type=bind,from=openblas-builder,source=/OpenBLAS-$OPENBLAS_VERSION/,target=/openblas/,rw \
5858
dnf install -y openssl-devel \
5959
&& dnf install -y \
60-
git tar gcc-toolset-13 automake libtool \
60+
git tar gcc-toolset-14 automake libtool \
6161
pkgconfig xsimd zeromq-devel kmod findutils protobuf* \
6262
libtiff-devel libjpeg-devel zlib-devel freetype-devel libwebp-devel \
6363
harfbuzz-devel libraqm-devel libimagequant-devel libxcb-devel \
@@ -84,7 +84,7 @@ ARG _GLIBCXX_USE_CXX11_ABI=1
8484
ARG OPENBLAS_VERSION=0.3.30
8585

8686
RUN --mount=type=cache,target=/root/.cache/uv \
87-
source /opt/rh/gcc-toolset-13/enable && \
87+
source /opt/rh/gcc-toolset-14/enable && \
8888
git clone --recursive https://github.com/pytorch/pytorch.git -b v${TORCH_VERSION} && \
8989
cd pytorch && \
9090
uv pip install -r requirements.txt && \
@@ -97,7 +97,7 @@ ARG TORCHVISION_VERSION=0.22.0
9797
ARG TORCHVISION_USE_NVJPEG=0
9898
ARG TORCHVISION_USE_FFMPEG=0
9999
RUN --mount=type=cache,target=/root/.cache/uv \
100-
source /opt/rh/gcc-toolset-13/enable && \
100+
source /opt/rh/gcc-toolset-14/enable && \
101101
git clone --recursive https://github.com/pytorch/vision.git -b v${TORCHVISION_VERSION} && \
102102
cd vision && \
103103
MAX_JOBS=${MAX_JOBS:-$(nproc)} \
@@ -113,7 +113,7 @@ ARG USE_ROCM=0
113113
ARG USE_CUDA=0
114114
ARG TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_FFMPEG=1
115115
RUN --mount=type=cache,target=/root/.cache/uv \
116-
source /opt/rh/gcc-toolset-13/enable && \
116+
source /opt/rh/gcc-toolset-14/enable && \
117117
git clone --recursive https://github.com/pytorch/audio.git -b v${TORCHAUDIO_VERSION} && \
118118
cd audio && \
119119
MAX_JOBS=${MAX_JOBS:-$(nproc)} \
@@ -130,7 +130,7 @@ ARG MAX_JOBS
130130
ARG PYARROW_PARALLEL
131131
ARG PYARROW_VERSION=21.0.0
132132
RUN --mount=type=cache,target=/root/.cache/uv \
133-
source /opt/rh/gcc-toolset-13/enable && \
133+
source /opt/rh/gcc-toolset-14/enable && \
134134
git clone --recursive https://github.com/apache/arrow.git -b apache-arrow-${PYARROW_VERSION} && \
135135
cd arrow/cpp && \
136136
mkdir build && cd build && \
@@ -162,7 +162,7 @@ ARG OPENCV_VERSION=86
162162
ARG OPENCV_PATCH=97f3f39
163163
ARG ENABLE_HEADLESS=1
164164
RUN --mount=type=cache,target=/root/.cache/uv \
165-
source /opt/rh/gcc-toolset-13/enable && \
165+
source /opt/rh/gcc-toolset-14/enable && \
166166
git clone --recursive https://github.com/opencv/opencv-python.git -b ${OPENCV_VERSION} && \
167167
cd opencv-python && \
168168
sed -i -E -e 's/"setuptools.+",/"setuptools",/g' pyproject.toml && \
@@ -196,7 +196,7 @@ ARG MAX_JOBS
196196
ARG NUMBA_VERSION=0.61.2
197197

198198
# Clone all required dependencies
199-
RUN dnf install ninja-build llvm15 llvm15-devel -y && source /opt/rh/gcc-toolset-13/enable && export PATH=$PATH:/usr/lib64/llvm15/bin && \
199+
RUN dnf install ninja-build llvm15 llvm15-devel -y && source /opt/rh/gcc-toolset-14/enable && export PATH=$PATH:/usr/lib64/llvm15/bin && \
200200
git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \
201201
cd ./numba && \
202202
if ! grep '#include "dynamic_annotations.h"' numba/_dispatcher.cpp; then \
@@ -211,6 +211,9 @@ RUN dnf install ninja-build llvm15 llvm15-devel -y && source /opt/rh/gcc-toolset
211211

212212
FROM base-builder AS vllmcache-builder
213213

214+
ENV LLVM_CONFIG=/usr/lib64/llvm15/bin/llvm-config
215+
ENV PATH=/usr/lib64/llvm15/bin:$PATH
216+
214217
COPY --from=torch-builder /tmp/control /dev/null
215218
COPY --from=arrow-builder /tmp/control /dev/null
216219
COPY --from=cv-builder /tmp/control /dev/null
@@ -225,18 +228,21 @@ ARG GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
225228
RUN --mount=type=cache,target=/root/.cache/uv \
226229
dnf install llvm15 llvm15-devel -y && \
227230
rpm -ivh --nodeps https://mirror.stream.centos.org/9-stream/CRB/ppc64le/os/Packages/protobuf-lite-devel-3.14.0-16.el9.ppc64le.rpm && \
228-
source /opt/rh/gcc-toolset-13/enable && \
231+
source /opt/rh/gcc-toolset-14/enable && \
229232
git clone https://github.com/huggingface/xet-core.git && cd xet-core/hf_xet/ && \
230233
uv pip install maturin && \
231234
uv build --wheel --out-dir /hf_wheels/
235+
236+
ENV CXXFLAGS="-fno-lto -Wno-error=free-nonheap-object" \
237+
CFLAGS="-fno-lto"
232238
RUN --mount=type=cache,target=/root/.cache/uv \
233239
--mount=type=bind,from=torch-builder,source=/torchwheels/,target=/torchwheels/,ro \
234240
--mount=type=bind,from=arrow-builder,source=/arrowwheels/,target=/arrowwheels/,ro \
235241
--mount=type=bind,from=cv-builder,source=/opencvwheels/,target=/opencvwheels/,ro \
236242
--mount=type=bind,from=numa-builder,source=/numactl/,target=/numactl/,rw \
237243
--mount=type=bind,from=numba-builder,source=/numbawheels/,target=/numbawheels/,ro \
238244
--mount=type=bind,src=.,dst=/src/,rw \
239-
source /opt/rh/gcc-toolset-13/enable && \
245+
source /opt/rh/gcc-toolset-14/enable && \
240246
export PATH=$PATH:/usr/lib64/llvm15/bin && \
241247
uv pip install /opencvwheels/*.whl /arrowwheels/*.whl /torchwheels/*.whl /numbawheels/*.whl && \
242248
sed -i -e 's/.*torch.*//g' /src/pyproject.toml /src/requirements/*.txt && \
@@ -260,7 +266,7 @@ FROM base-builder AS lapack-builder
260266
ARG MAX_JOBS
261267
ARG LAPACK_VERSION=3.12.1
262268
RUN git clone --recursive https://github.com/Reference-LAPACK/lapack.git -b v${LAPACK_VERSION} \
263-
&& cd lapack && source /opt/rh/gcc-toolset-13/enable \
269+
&& cd lapack && source /opt/rh/gcc-toolset-14/enable \
264270
&& cmake -B build -S . \
265271
&& cmake --build build -j ${MAX_JOBS:-$(nproc)}
266272

@@ -299,7 +305,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
299305
--mount=type=bind,from=openblas-builder,source=/OpenBLAS-$OPENBLAS_VERSION/,target=/openblas/,rw \
300306
rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
301307
microdnf install --nodocs -y \
302-
libomp tar findutils openssl llvm15 llvm15-devel \
308+
libomp libicu tar findutils openssl llvm15 llvm15-devel \
303309
pkgconfig xsimd g++ gcc-fortran libsndfile \
304310
libtiff libjpeg openjpeg2 zlib zeromq \
305311
freetype lcms2 libwebp tcl tk utf8proc \

requirements/common.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ pillow # Required for image processing
1919
prometheus-fastapi-instrumentator >= 7.0.0
2020
tiktoken >= 0.6.0 # Required for DBRX tokenizer
2121
lm-format-enforcer == 0.11.3
22-
llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x"
22+
llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" or platform_machine == "ppc64le"
2323
outlines_core == 0.2.11
2424
# required for outlines backend disk cache
2525
diskcache == 5.6.3
2626
lark == 1.2.2
27-
xgrammar == 0.1.27; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x"
27+
xgrammar == 0.1.27; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
2828
typing_extensions >= 4.10
2929
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
3030
partial-json-parser # used for parsing partial JSON outputs

0 commit comments

Comments
 (0)