Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
265 changes: 258 additions & 7 deletions runtimes/datascience/ubi9-python-3.12/Dockerfile.konflux.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ ARG BASE_IMAGE
####################
FROM ${BASE_IMAGE} AS cpu-base

ARG TARGETARCH

WORKDIR /opt/app-root/bin

# OS Packages needs to be installed as root
Expand All @@ -24,7 +26,54 @@ RUN dnf -y upgrade --refresh --best --nodocs --noplugins --setopt=install_weak_d
# upgrade first to avoid fixable vulnerabilities end

# Install useful OS packages
RUN dnf install -y perl mesa-libGL skopeo libxcrypt-compat && dnf clean all && rm -rf /var/cache/yum
RUN --mount=type=cache,target=/var/cache/dnf \
echo "Building for architecture: ${TARGETARCH}" && \
PACKAGES="perl mesa-libGL skopeo libxcrypt-compat" && \
# Additional dev tools only for s390x
if [ "$TARGETARCH" = "s390x" ]; then \
PACKAGES="$PACKAGES gcc gcc-c++ make openssl-devel autoconf automake libtool cmake python3-devel pybind11-devel openblas-devel unixODBC-devel openssl zlib-devel"; \
fi && \
if [ "$TARGETARCH" = "ppc64le" ]; then \
PACKAGES="$PACKAGES git gcc-toolset-13 make wget unzip rust cargo unixODBC-devel cmake ninja-build"; \
fi && \
if [ -n "$PACKAGES" ]; then \
echo "Installing: $PACKAGES" && \
dnf install -y $PACKAGES && \
dnf clean all && rm -rf /var/cache/yum; \
fi

RUN if [ "$TARGETARCH" = "ppc64le" ]; then \
echo 'export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/' >> /etc/profile.d/ppc64le.sh && \
echo 'export LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:$LD_LIBRARY_PATH' >> /etc/profile.d/ppc64le.sh && \
echo 'export OPENBLAS_VERSION=0.3.30' >> /etc/profile.d/ppc64le.sh && \
echo 'export ONNX_VERSION=1.19.0' >> /etc/profile.d/ppc64le.sh && \
echo 'export PYARROW_VERSION=17.0.0' >> /etc/profile.d/ppc64le.sh && \
echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> /etc/profile.d/ppc64le.sh && \
echo 'export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1' >> /etc/profile.d/ppc64le.sh; \
fi

# For s390x only, set ENV vars and install Rust
RUN if [ "$TARGETARCH" = "s390x" ]; then \
# Install Rust and set up environment
mkdir -p /opt/.cargo && \
export HOME=/root && \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs -o rustup-init.sh && \
chmod +x rustup-init.sh && \
CARGO_HOME=/opt/.cargo HOME=/root ./rustup-init.sh -y --no-modify-path && \
rm -f rustup-init.sh && \
chown -R 1001:0 /opt/.cargo && \
# Set environment variables
echo 'export PATH=/opt/.cargo/bin:$PATH' >> /etc/profile.d/cargo.sh && \
echo 'export CARGO_HOME=/opt/.cargo' >> /etc/profile.d/cargo.sh && \
echo 'export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1' >> /etc/profile.d/cargo.sh; \
fi

# Set python alternatives only for s390x (not needed for other arches)
RUN if [ "$TARGETARCH" = "s390x" ]; then \
alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \
alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
python --version && python3 --version; \
fi

# Other apps and tools installed as default user
USER 1001
Expand All @@ -40,28 +89,230 @@ RUN curl -L https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/clients/oc
rm -f /tmp/openshift-client-linux.tar.gz
# Install the oc client end

##############################
# wheel-builder stage #
# NOTE: Only used in s390x
##############################
FROM cpu-base AS s390x-builder

ARG TARGETARCH
USER 0
WORKDIR /tmp/build-wheels

# Build pyarrow optimized for s390x
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/dnf \
if [ "$TARGETARCH" = "s390x" ]; then \
# Install build dependencies (shared for pyarrow and onnx)
dnf install -y cmake make gcc-c++ pybind11-devel wget && \
dnf clean all && \
# Build and collect pyarrow wheel
git clone --depth 1 https://github.com/apache/arrow.git && \
cd arrow/cpp && \
mkdir release && cd release && \
cmake -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=/usr/local \
-DARROW_PYTHON=ON \
-DARROW_PARQUET=ON \
-DARROW_ORC=ON \
-DARROW_FILESYSTEM=ON \
-DARROW_JSON=ON \
-DARROW_CSV=ON \
-DARROW_DATASET=ON \
-DARROW_DEPENDENCY_SOURCE=BUNDLED \
-DARROW_WITH_LZ4=OFF \
-DARROW_WITH_ZSTD=OFF \
-DARROW_WITH_SNAPPY=OFF \
-DARROW_BUILD_TESTS=OFF \
-DARROW_BUILD_BENCHMARKS=OFF \
.. && \
make -j$(nproc) VERBOSE=1 && \
make install -j$(nproc) && \
cd ../../python && \
pip install --no-cache-dir -r requirements-build.txt && \
PYARROW_WITH_PARQUET=1 \
PYARROW_WITH_DATASET=1 \
PYARROW_WITH_FILESYSTEM=1 \
PYARROW_WITH_JSON=1 \
PYARROW_WITH_CSV=1 \
PYARROW_PARALLEL=$(nproc) \
python setup.py build_ext --build-type=release --bundle-arrow-cpp bdist_wheel && \
mkdir -p /tmp/wheels && \
cp dist/pyarrow-*.whl /tmp/wheels/ && \
# Ensure wheels directory exists and has content
ls -la /tmp/wheels/; \
else \
# Create empty wheels directory for non-s390x
mkdir -p /tmp/wheels; \
fi

###################################
# openblas builder stage for ppc64le
##################################

FROM cpu-base AS openblas-builder
USER root
WORKDIR /root

ARG TARGETARCH

ENV OPENBLAS_VERSION=0.3.30

RUN echo "openblas-builder stage TARGETARCH: ${TARGETARCH}"

# Download and build OpenBLAS
RUN if [ "$TARGETARCH" = "ppc64le" ]; then \
source /opt/rh/gcc-toolset-13/enable && \
wget https://github.com/OpenMathLib/OpenBLAS/releases/download/v${OPENBLAS_VERSION}/OpenBLAS-${OPENBLAS_VERSION}.zip && \
unzip OpenBLAS-${OPENBLAS_VERSION}.zip && cd OpenBLAS-${OPENBLAS_VERSION} && \
make -j$(nproc) TARGET=POWER9 BINARY=64 USE_OPENMP=1 USE_THREAD=1 NUM_THREADS=120 DYNAMIC_ARCH=1 INTERFACE64=0; \
else \
echo "Not ppc64le, skipping OpenBLAS build" && mkdir -p /root/OpenBLAS-dummy; \
fi

###################################
# onnx builder stage for ppc64le
###################################

FROM cpu-base AS onnx-builder
USER root
WORKDIR /root

ARG TARGETARCH
ENV ONNX_VERSION=1.19.0

RUN echo "onnx-builder stage TARGETARCH: ${TARGETARCH}"

RUN if [ "$TARGETARCH" = "ppc64le" ]; then \
source /opt/rh/gcc-toolset-13/enable && \
git clone --recursive https://github.com/onnx/onnx.git && \
cd onnx && git checkout v${ONNX_VERSION} && \
git submodule update --init --recursive && \
pip install -r requirements.txt && \
export CMAKE_ARGS="-DPython3_EXECUTABLE=$(which python3.12)" && \
pip wheel . -w /onnx_wheels; \
else \
echo "Not ppc64le, skipping ONNX build" && mkdir -p /onnx_wheels; \
fi

###################################
# pyarrow builder stage for ppc64le
##################################

FROM cpu-base AS arrow-builder
USER root
WORKDIR /root

ARG TARGETARCH
ENV PYARROW_VERSION=17.0.0

RUN echo "arrow-builder stage TARGETARCH: ${TARGETARCH}"

RUN if [ "$TARGETARCH" = "ppc64le" ]; then \
git clone -b apache-arrow-${PYARROW_VERSION} https://github.com/apache/arrow.git --recursive && \
cd arrow && rm -rf .git && mkdir dist && \
pip3 install -r python/requirements-build.txt && \
export ARROW_HOME=$(pwd)/dist && \
export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH && \
export CMAKE_PREFIX_PATH=$ARROW_HOME:$CMAKE_PREFIX_PATH && \
export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && \
export ARROW_TEST_DATA="${PWD}/testing/data" && \
cmake -S cpp -B cpp/build \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_BUILD_TYPE=release \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_DATASET=ON \
-DARROW_FILESYSTEM=ON \
-DARROW_COMPUTE=ON \
-DARROW_JSON=ON \
-DARROW_CSV=ON \
-DARROW_PYTHON=ON \
-DARROW_PARQUET=ON \
-DARROW_BUILD_SHARED=ON \
-DARROW_BUILD_TESTS=OFF && \
cd cpp/build && \
make -j20 install && \
export PYARROW_PARALLEL=20 && \
export PYARROW_WITH_PARQUET=1 && \
export PYARROW_WITH_DATASET=1 && \
export PYARROW_BUNDLE_ARROW_CPP=1 && \
pip3 install wheel && \
cd ../../python && \
python setup.py build_ext \
--build-type=release \
--bundle-arrow-cpp \
bdist_wheel --dist-dir /arrowwheels; \
else \
echo "Not ppc64le, skipping pyarrow build" && mkdir -p /arrowwheels; \
fi
#######################
# runtime-datascience #
#######################
FROM cpu-base AS runtime-datascience

ARG TARGETARCH
ARG DATASCIENCE_SOURCE_CODE=runtimes/datascience/ubi9-python-3.12

WORKDIR /opt/app-root/bin
USER 0

# Install ppc64le-built wheels if available
COPY --from=openblas-builder /root/OpenBLAS-* /openblas
COPY --from=onnx-builder /onnx_wheels /tmp/onnx_wheels
COPY --from=arrow-builder /arrowwheels /tmp/arrowwheels

# Install Python packages from requirements.txt
RUN if [ "$TARGETARCH" = "ppc64le" ]; then \
echo "Installing ppc64le ONNX, pyarrow wheels and OpenBLAS..." && \
HOME=/root pip install /tmp/onnx_wheels/*.whl /tmp/arrowwheels/*.whl && \
if [ -d "/openblas" ] && [ "$(ls -A /openblas 2>/dev/null)" ]; then \
PREFIX=/usr/local make -C /openblas install; \
fi && rm -rf /openblas /tmp/onnx_wheels /tmp/arrowwheels; \
else \
echo "Skipping architecture-specific wheel installs for (${TARGETARCH})" && \
rm -rf /tmp/wheels /openblas /tmp/onnx_wheels /tmp/arrowwheels; \
fi

USER 0
# Copy wheels from build stage (s390x only)
COPY --from=s390x-builder /tmp/wheels /tmp/wheels
RUN if [ "$TARGETARCH" = "s390x" ]; then \
pip install --no-cache-dir /tmp/wheels/*.whl && rm -rf /tmp/wheels; \
else \
echo "Skipping wheel install for $TARGETARCH"; \
fi

# Install Python packages from pylock.toml
COPY ${DATASCIENCE_SOURCE_CODE}/pylock.toml ./
# Copy Elyra dependencies for air-gapped enviroment
COPY ${DATASCIENCE_SOURCE_CODE}/utils ./utils/

RUN echo "Installing softwares and packages" && \
# This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`,
# we often don't know the correct hashes and `--require-hashes` would therefore fail on non amd64, where building is common.
uv pip install --strict --no-deps --no-cache --no-config --no-progress --verify-hashes --compile-bytecode --index-strategy=unsafe-best-match --requirements=./pylock.toml && \
# Fix permissions to support pip in Openshift environments \
RUN --mount=type=cache,target=/root/.cache/pip \
echo "Installing softwares and packages" && \
if [ "$TARGETARCH" = "ppc64le" ]; then \
export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig; \
export LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:$LD_LIBRARY_PATH && \
uv pip install --strict --no-deps --no-cache --no-config --no-progress --verify-hashes --compile-bytecode --index-strategy=unsafe-best-match --requirements=./pylock.toml; \
elif [ "$TARGETARCH" = "s390x" ]; then \
# For s390x, we need special flags and environment variables for building packages
GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1 \
CFLAGS="-O3" CXXFLAGS="-O3" \
uv pip install --strict --no-deps --no-cache --no-config --no-progress --verify-hashes --compile-bytecode --index-strategy=unsafe-best-match --requirements=./pylock.toml; \
else \
# This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`,
# we often don't know the correct hashes and `--require-hashes` would therefore fail on non amd64, where building is common.
uv pip install --strict --no-deps --no-cache --no-config --no-progress --verify-hashes --compile-bytecode --index-strategy=unsafe-best-match --requirements=./pylock.toml; \
fi && \
# Fix permissions to support pip in Openshift environments
chmod -R g+w /opt/app-root/lib/python3.12/site-packages && \
fix-permissions /opt/app-root -P

USER 1001

WORKDIR /opt/app-root/src

LABEL name="rhoai/odh-pipeline-runtime-datascience-cpu-py312-rhel9" \
Expand Down
Loading