diff --git a/runtimes/datascience/ubi9-python-3.12/Dockerfile.konflux.cpu b/runtimes/datascience/ubi9-python-3.12/Dockerfile.konflux.cpu index 0f36aac10b..08269e7f2f 100644 --- a/runtimes/datascience/ubi9-python-3.12/Dockerfile.konflux.cpu +++ b/runtimes/datascience/ubi9-python-3.12/Dockerfile.konflux.cpu @@ -8,6 +8,8 @@ ARG BASE_IMAGE #################### FROM ${BASE_IMAGE} AS cpu-base +ARG TARGETARCH + WORKDIR /opt/app-root/bin # OS Packages needs to be installed as root @@ -24,7 +26,54 @@ RUN dnf -y upgrade --refresh --best --nodocs --noplugins --setopt=install_weak_d # upgrade first to avoid fixable vulnerabilities end # Install useful OS packages -RUN dnf install -y perl mesa-libGL skopeo libxcrypt-compat && dnf clean all && rm -rf /var/cache/yum +RUN --mount=type=cache,target=/var/cache/dnf \ + echo "Building for architecture: ${TARGETARCH}" && \ + PACKAGES="perl mesa-libGL skopeo libxcrypt-compat" && \ + # Additional dev tools only for s390x + if [ "$TARGETARCH" = "s390x" ]; then \ + PACKAGES="$PACKAGES gcc gcc-c++ make openssl-devel autoconf automake libtool cmake python3-devel pybind11-devel openblas-devel unixODBC-devel openssl zlib-devel"; \ + fi && \ + if [ "$TARGETARCH" = "ppc64le" ]; then \ + PACKAGES="$PACKAGES git gcc-toolset-13 make wget unzip rust cargo unixODBC-devel cmake ninja-build"; \ + fi && \ + if [ -n "$PACKAGES" ]; then \ + echo "Installing: $PACKAGES" && \ + dnf install -y $PACKAGES && \ + dnf clean all && rm -rf /var/cache/yum; \ + fi + +RUN if [ "$TARGETARCH" = "ppc64le" ]; then \ + echo 'export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/' >> /etc/profile.d/ppc64le.sh && \ + echo 'export LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:$LD_LIBRARY_PATH' >> /etc/profile.d/ppc64le.sh && \ + echo 'export OPENBLAS_VERSION=0.3.30' >> /etc/profile.d/ppc64le.sh && \ + echo 'export ONNX_VERSION=1.19.0' >> /etc/profile.d/ppc64le.sh && \ + echo 'export PYARROW_VERSION=17.0.0' >> /etc/profile.d/ppc64le.sh && \ + echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> /etc/profile.d/ppc64le.sh && \ + echo 'export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1' >> /etc/profile.d/ppc64le.sh; \ + fi + +# For s390x only, set ENV vars and install Rust +RUN if [ "$TARGETARCH" = "s390x" ]; then \ + # Install Rust and set up environment + mkdir -p /opt/.cargo && \ + export HOME=/root && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs -o rustup-init.sh && \ + chmod +x rustup-init.sh && \ + CARGO_HOME=/opt/.cargo HOME=/root ./rustup-init.sh -y --no-modify-path && \ + rm -f rustup-init.sh && \ + chown -R 1001:0 /opt/.cargo && \ + # Set environment variables + echo 'export PATH=/opt/.cargo/bin:$PATH' >> /etc/profile.d/cargo.sh && \ + echo 'export CARGO_HOME=/opt/.cargo' >> /etc/profile.d/cargo.sh && \ + echo 'export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1' >> /etc/profile.d/cargo.sh; \ +fi + +# Set python alternatives only for s390x (not needed for other arches) +RUN if [ "$TARGETARCH" = "s390x" ]; then \ + alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \ + alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \ + python --version && python3 --version; \ +fi # Other apps and tools installed as default user USER 1001 @@ -40,28 +89,230 @@ RUN curl -L https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/clients/oc rm -f /tmp/openshift-client-linux.tar.gz # Install the oc client end +############################## +# wheel-builder stage # +# NOTE: Only used in s390x +############################## +FROM cpu-base AS s390x-builder + +ARG TARGETARCH +USER 0 +WORKDIR /tmp/build-wheels + +# Build pyarrow optimized for s390x +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=cache,target=/root/.cache/dnf \ + if [ "$TARGETARCH" = "s390x" ]; then \ + # Install build dependencies (shared for pyarrow and onnx) + dnf install -y cmake make gcc-c++ pybind11-devel wget && \ + dnf clean all && \ + # Build and collect pyarrow wheel + git clone --depth 1 https://github.com/apache/arrow.git && \ + cd arrow/cpp && \ + mkdir release && cd release && \ + cmake -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/usr/local \ + -DARROW_PYTHON=ON \ + -DARROW_PARQUET=ON \ + -DARROW_ORC=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_JSON=ON \ + -DARROW_CSV=ON \ + -DARROW_DATASET=ON \ + -DARROW_DEPENDENCY_SOURCE=BUNDLED \ + -DARROW_WITH_LZ4=OFF \ + -DARROW_WITH_ZSTD=OFF \ + -DARROW_WITH_SNAPPY=OFF \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_BUILD_BENCHMARKS=OFF \ + .. && \ + make -j$(nproc) VERBOSE=1 && \ + make install -j$(nproc) && \ + cd ../../python && \ + pip install --no-cache-dir -r requirements-build.txt && \ + PYARROW_WITH_PARQUET=1 \ + PYARROW_WITH_DATASET=1 \ + PYARROW_WITH_FILESYSTEM=1 \ + PYARROW_WITH_JSON=1 \ + PYARROW_WITH_CSV=1 \ + PYARROW_PARALLEL=$(nproc) \ + python setup.py build_ext --build-type=release --bundle-arrow-cpp bdist_wheel && \ + mkdir -p /tmp/wheels && \ + cp dist/pyarrow-*.whl /tmp/wheels/ && \ + # Ensure wheels directory exists and has content + ls -la /tmp/wheels/; \ + else \ + # Create empty wheels directory for non-s390x + mkdir -p /tmp/wheels; \ + fi + +################################### +# openblas builder stage for ppc64le +################################## + +FROM cpu-base AS openblas-builder +USER root +WORKDIR /root + +ARG TARGETARCH + +ENV OPENBLAS_VERSION=0.3.30 + +RUN echo "openblas-builder stage TARGETARCH: ${TARGETARCH}" + +# Download and build OpenBLAS +RUN if [ "$TARGETARCH" = "ppc64le" ]; then \ + source /opt/rh/gcc-toolset-13/enable && \ + wget https://github.com/OpenMathLib/OpenBLAS/releases/download/v${OPENBLAS_VERSION}/OpenBLAS-${OPENBLAS_VERSION}.zip && \ + unzip OpenBLAS-${OPENBLAS_VERSION}.zip && cd OpenBLAS-${OPENBLAS_VERSION} && \ + make -j$(nproc) TARGET=POWER9 BINARY=64 USE_OPENMP=1 USE_THREAD=1 NUM_THREADS=120 DYNAMIC_ARCH=1 INTERFACE64=0; \ + else \ + echo "Not ppc64le, skipping OpenBLAS build" && mkdir -p /root/OpenBLAS-dummy; \ + fi + +################################### +# onnx builder stage for ppc64le +################################### + +FROM cpu-base AS onnx-builder +USER root +WORKDIR /root + +ARG TARGETARCH +ENV ONNX_VERSION=1.19.0 + +RUN echo "onnx-builder stage TARGETARCH: ${TARGETARCH}" + +RUN if [ "$TARGETARCH" = "ppc64le" ]; then \ + source /opt/rh/gcc-toolset-13/enable && \ + git clone --recursive https://github.com/onnx/onnx.git && \ + cd onnx && git checkout v${ONNX_VERSION} && \ + git submodule update --init --recursive && \ + pip install -r requirements.txt && \ + export CMAKE_ARGS="-DPython3_EXECUTABLE=$(which python3.12)" && \ + pip wheel . -w /onnx_wheels; \ + else \ + echo "Not ppc64le, skipping ONNX build" && mkdir -p /onnx_wheels; \ + fi + +################################### +# pyarrow builder stage for ppc64le +################################## + +FROM cpu-base AS arrow-builder +USER root +WORKDIR /root + +ARG TARGETARCH +ENV PYARROW_VERSION=17.0.0 + +RUN echo "arrow-builder stage TARGETARCH: ${TARGETARCH}" + +RUN if [ "$TARGETARCH" = "ppc64le" ]; then \ + git clone -b apache-arrow-${PYARROW_VERSION} https://github.com/apache/arrow.git --recursive && \ + cd arrow && rm -rf .git && mkdir dist && \ + pip3 install -r python/requirements-build.txt && \ + export ARROW_HOME=$(pwd)/dist && \ + export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH && \ + export CMAKE_PREFIX_PATH=$ARROW_HOME:$CMAKE_PREFIX_PATH && \ + export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && \ + export ARROW_TEST_DATA="${PWD}/testing/data" && \ + cmake -S cpp -B cpp/build \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ + -DCMAKE_BUILD_TYPE=release \ + -DARROW_WITH_BZ2=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_BROTLI=ON \ + -DARROW_DATASET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_JSON=ON \ + -DARROW_CSV=ON \ + -DARROW_PYTHON=ON \ + -DARROW_PARQUET=ON \ + -DARROW_BUILD_SHARED=ON \ + -DARROW_BUILD_TESTS=OFF && \ + cd cpp/build && \ + make -j20 install && \ + export PYARROW_PARALLEL=20 && \ + export PYARROW_WITH_PARQUET=1 && \ + export PYARROW_WITH_DATASET=1 && \ + export PYARROW_BUNDLE_ARROW_CPP=1 && \ + pip3 install wheel && \ + cd ../../python && \ + python setup.py build_ext \ + --build-type=release \ + --bundle-arrow-cpp \ + bdist_wheel --dist-dir /arrowwheels; \ + else \ + echo "Not ppc64le, skipping pyarrow build" && mkdir -p /arrowwheels; \ + fi ####################### # runtime-datascience # ####################### FROM cpu-base AS runtime-datascience +ARG TARGETARCH ARG DATASCIENCE_SOURCE_CODE=runtimes/datascience/ubi9-python-3.12 WORKDIR /opt/app-root/bin +USER 0 + +# Install ppc64le-built wheels if available +COPY --from=openblas-builder /root/OpenBLAS-* /openblas +COPY --from=onnx-builder /onnx_wheels /tmp/onnx_wheels +COPY --from=arrow-builder /arrowwheels /tmp/arrowwheels -# Install Python packages from requirements.txt +RUN if [ "$TARGETARCH" = "ppc64le" ]; then \ + echo "Installing ppc64le ONNX, pyarrow wheels and OpenBLAS..." && \ + HOME=/root pip install /tmp/onnx_wheels/*.whl /tmp/arrowwheels/*.whl && \ + if [ -d "/openblas" ] && [ "$(ls -A /openblas 2>/dev/null)" ]; then \ + PREFIX=/usr/local make -C /openblas install; \ + fi && rm -rf /openblas /tmp/onnx_wheels /tmp/arrowwheels; \ + else \ + echo "Skipping architecture-specific wheel installs for (${TARGETARCH})" && \ + rm -rf /tmp/wheels /openblas /tmp/onnx_wheels /tmp/arrowwheels; \ + fi + +USER 0 +# Copy wheels from build stage (s390x only) +COPY --from=s390x-builder /tmp/wheels /tmp/wheels +RUN if [ "$TARGETARCH" = "s390x" ]; then \ + pip install --no-cache-dir /tmp/wheels/*.whl && rm -rf /tmp/wheels; \ +else \ + echo "Skipping wheel install for $TARGETARCH"; \ +fi + +# Install Python packages from pylock.toml COPY ${DATASCIENCE_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${DATASCIENCE_SOURCE_CODE}/utils ./utils/ -RUN echo "Installing softwares and packages" && \ - # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, - # we often don't know the correct hashes and `--require-hashes` would therefore fail on non amd64, where building is common. - uv pip install --strict --no-deps --no-cache --no-config --no-progress --verify-hashes --compile-bytecode --index-strategy=unsafe-best-match --requirements=./pylock.toml && \ - # Fix permissions to support pip in Openshift environments \ +RUN --mount=type=cache,target=/root/.cache/pip \ + echo "Installing softwares and packages" && \ + if [ "$TARGETARCH" = "ppc64le" ]; then \ + export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig; \ + export LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:$LD_LIBRARY_PATH && \ + uv pip install --strict --no-deps --no-cache --no-config --no-progress --verify-hashes --compile-bytecode --index-strategy=unsafe-best-match --requirements=./pylock.toml; \ + elif [ "$TARGETARCH" = "s390x" ]; then \ + # For s390x, we need special flags and environment variables for building packages + GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1 \ + CFLAGS="-O3" CXXFLAGS="-O3" \ + uv pip install --strict --no-deps --no-cache --no-config --no-progress --verify-hashes --compile-bytecode --index-strategy=unsafe-best-match --requirements=./pylock.toml; \ + else \ + # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, + # we often don't know the correct hashes and `--require-hashes` would therefore fail on non amd64, where building is common. + uv pip install --strict --no-deps --no-cache --no-config --no-progress --verify-hashes --compile-bytecode --index-strategy=unsafe-best-match --requirements=./pylock.toml; \ + fi && \ + # Fix permissions to support pip in Openshift environments chmod -R g+w /opt/app-root/lib/python3.12/site-packages && \ fix-permissions /opt/app-root -P +USER 1001 + WORKDIR /opt/app-root/src LABEL name="rhoai/odh-pipeline-runtime-datascience-cpu-py312-rhel9" \