diff --git a/.buildbot/jenkins-build-llvm-project.py b/.buildbot/jenkins-build-llvm-project.py index 06c5bb115e..790ab410f7 100755 --- a/.buildbot/jenkins-build-llvm-project.py +++ b/.buildbot/jenkins-build-llvm-project.py @@ -7,8 +7,12 @@ LLVM_PROJECT_DOCKERFILE = "docker/Dockerfile.llvm-project" LLVM_PROJECT_GITHUB_URL = "https://api.github.com/repos/llvm/llvm-project" LLVM_PROJECT_BASE_IMAGE = { - "static": "ghcr.io/onnxmlir/ubuntu:jammy", - "shared": "registry.access.redhat.com/ubi8-minimal:latest", + "static": "ghcr.io/onnxmlir/ubuntu:noble-", # Will append cpu_arch + "shared": "registry.access.redhat.com/ubi9-minimal:latest", # No arch suffix needed +} +LLVM_PROJECT_BASE_IMAGE_NEEDS_ARCH = { + "static": True, + "shared": False, # UBI9 uses manifest lists, no arch-specific tags } LLVM_PROJECT_IMAGE = { "static": docker_static_image_name, @@ -168,14 +172,20 @@ def setup_per_pr_llvm_project(image_type, exp): or labels["llvm_project_sha1_date"] <= exp["llvm_project_sha1_date"] ): layer_sha256 = "" + # Conditionally append cpu_arch to base image + base_image = LLVM_PROJECT_BASE_IMAGE[image_type] + if LLVM_PROJECT_BASE_IMAGE_NEEDS_ARCH[image_type]: + base_image += cpu_arch + for line in docker_api.build( path=".", dockerfile=LLVM_PROJECT_DOCKERFILE, tag=image_full, + platform=cpu_arch, decode=True, rm=True, buildargs={ - "BASE_IMAGE": LLVM_PROJECT_BASE_IMAGE[image_type], + "BASE_IMAGE": base_image, "NPROC": NPROC, "BUILD_SHARED_LIBS": LLVM_PROJECT_BUILD_SHARED_LIBS[image_type], "LLVM_PROJECT_SHA1": exp["llvm_project_sha1"], @@ -225,6 +235,10 @@ def setup_per_pr_llvm_project(image_type, exp): def main(): exp = extract_llvm_project_info() + + # Ensure cpu_arch matches the Docker daemon/host to avoid pulling the wrong arch + resolve_and_override_cpu_arch_from_docker() + setup_per_pr_llvm_project("static", exp) setup_per_pr_llvm_project("shared", exp) diff --git a/.buildbot/jenkins-build-onnx-mlir.py b/.buildbot/jenkins-build-onnx-mlir.py index 4ff4d589d7..d92c4b6b53 100755 --- a/.buildbot/jenkins-build-onnx-mlir.py +++ b/.buildbot/jenkins-build-onnx-mlir.py @@ -163,6 +163,7 @@ def build_per_pr_onnx_mlir(image_type, exp): path=".", dockerfile=ONNX_MLIR_DOCKERFILE[image_type], tag=image_repo + ":" + image_tag, + platform="linux/" + cpu_arch, decode=True, rm=True, buildargs={ @@ -209,6 +210,9 @@ def build_per_pr_onnx_mlir(image_type, exp): def main(): + # Ensure cpu_arch matches the Docker daemon/host to avoid pulling the wrong arch + resolve_and_override_cpu_arch_from_docker() + build_per_pr_onnx_mlir("dev", get_onnx_mlir_info("dev", ".")) build_per_pr_onnx_mlir("usr", get_onnx_mlir_info("usr", ".")) diff --git a/.buildbot/jenkins_common.py b/.buildbot/jenkins_common.py index 16751d95b5..b63834a774 100755 --- a/.buildbot/jenkins_common.py +++ b/.buildbot/jenkins_common.py @@ -106,6 +106,37 @@ ] +def resolve_and_override_cpu_arch_from_docker(): + """Detect the Docker daemon/host architecture and, if different from the + current cpu_arch, override it so we pull/build the correct arch images.""" + global cpu_arch + try: + info = docker_api.info() + arch = info.get("Architecture", "").lower() + # Map common Docker Architecture values to cpu_arch tag names used by our registry + arch_map = { + "x86_64": "amd64", # Docker daemon reports x86_64, but platform needs amd64 + "amd64": "amd64", + "aarch64": "arm64", + "arm64": "arm64", + "s390x": "s390x", + "ppc64le": "ppc64le", + } + detected = arch_map.get(arch, arch) + if detected and cpu_arch != detected: + logging.info( + "Docker daemon reports Architecture=%s, overriding cpu_arch '%s' -> '%s'", + arch, + cpu_arch, + detected, + ) + cpu_arch = detected + except Exception as e: + logging.warning( + "Could not detect Docker daemon architecture to resolve cpu_arch: %s", e + ) + + def strtobool(s: str) -> bool: """Reimplement strtobool per PEP 632 and python 3.12 deprecation.""" diff --git a/CMakeLists.txt b/CMakeLists.txt index 8573332d9c..c66e18d812 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # Match the minimum required version of LLVM and MLIR -cmake_minimum_required(VERSION 3.20.0) +cmake_minimum_required(VERSION 3.26.0) project(onnx-mlir) diff --git a/docker/Dockerfile.llvm-project b/docker/Dockerfile.llvm-project index 40f14a79cd..b58a618601 100644 --- a/docker/Dockerfile.llvm-project +++ b/docker/Dockerfile.llvm-project @@ -1,5 +1,5 @@ -# By default, use ubuntu:jammy, remember to change Jenkins build script as well -ARG BASE_IMAGE="ghcr.io/onnxmlir/ubuntu:jammy" +# By default, use ubuntu:noble for 24.04 +ARG BASE_IMAGE="ghcr.io/onnxmlir/ubuntu:noble" FROM ${BASE_IMAGE} # Label the image for various checking and cleanup @@ -21,6 +21,7 @@ WORKDIR ${WORK_DIR} # Install tools needed RUN distro=$(cat /etc/os-release|grep -Po '(?<=^ID=").*(?=")|(?<=^ID=)[^"].*[^"]') \ && TZ="America/New_York" \ + && ARCH=$(uname -m) \ && if [ "${distro}" = "debian" ] || [ "${distro}" = "ubuntu" ]; then \ DEBIAN_FRONTEND=noninteractive && \ apt-get update -qq && \ @@ -30,12 +31,16 @@ RUN distro=$(cat /etc/os-release|grep -Po '(?<=^ID=").*(?=")|(?<=^ID=)[^"].*[^"] apt-get install -qq -y --no-install-recommends \ autoconf automake ca-certificates clang cmake cppcheck \ curl default-jdk-headless gcc g++ git libncurses-dev \ - libtool make maven ninja-build openjdk-11-jdk-headless \ - python3 python3-dev python3-distutils python3-numpy \ + libtool make maven ninja-build \ + openjdk-11-jdk-headless openjdk-21-jdk-headless \ + python3 python3-dev python3-numpy \ python3-pip python3-pytest-xdist python3-setuptools \ - python3-typing-extensions unzip zip zlib1g-dev && \ + python3-typing-extensions unzip zip zlib1g-dev \ + # Install clang-20 for all architectures + clang-20 clang-tools-20 && \ rm -rf /var/lib/apt/lists/* && \ - ln -sf /usr/bin/pytest-3 /usr/bin/pytest; \ + rm -f /usr/lib/python*/EXTERNALLY-MANAGED && \ + ln -sf /usr/bin/pytest-3 /usr/bin/pytest || true; \ elif [ "${distro}" = "rhel" ] || [ "${distro}" = "fedora" ]; then \ ln -sf /usr/share/zoneinfo/${TZ} /etc/localtime && \ ([ -x /usr/bin/microdnf ] && microdnf install -y yum) && \ @@ -43,42 +48,64 @@ RUN distro=$(cat /etc/os-release|grep -Po '(?<=^ID=").*(?=")|(?<=^ID=)[^"].*[^"] yum install -q -y \ https://dl.fedoraproject.org/pub/epel/epel-release-latest-${RHEL_VERSION}.noarch.rpm && \ yum update -q -y && \ - yum install -q -y \ - autoconf automake ca-certificates clang cmake diffutils \ - file java-11-openjdk-devel java-11-openjdk-headless \ - gcc gcc-c++ git libtool make ncurses-devel ninja-build \ - python39 python39-devel python39-pip python39-setuptools \ - python39-wheel tzdata-java unzip which zip zlib-devel && \ + # For RHEL 9+, install python3.9 from AppStream + if [ "${RHEL_VERSION}" -ge 9 ]; then \ + yum install -q -y \ + autoconf automake ca-certificates clang cmake diffutils \ + file java-11-openjdk-devel java-11-openjdk-headless \ + java-21-openjdk-devel java-21-openjdk-headless \ + gcc gcc-c++ git libtool make ncurses-devel ninja-build \ + python3.9 python3.9-devel python3.9-pip python3.9-setuptools \ + tzdata-java unzip which zip zlib-devel && \ + # Set python3.9 as the default python3 + alternatives --set python3 /usr/bin/python3.9 || \ + alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 && \ + # Set Java 21 as the default for runtime (needed for ONNX-MLIR JNI tests) + # Java 11 will be explicitly set via JAVA_HOME for Bazel bootstrap only + alternatives --set java java-21-openjdk.x86_64 || \ + alternatives --set java /usr/lib/jvm/java-21-openjdk-*/bin/java || \ + alternatives --auto java; \ + else \ + yum install -q -y \ + autoconf automake ca-certificates clang cmake diffutils \ + file java-21-openjdk-devel java-21-openjdk-headless \ + gcc gcc-c++ git libtool make ncurses-devel ninja-build \ + python39 python39-devel python39-pip python39-setuptools \ + python39-wheel tzdata-java unzip which zip zlib-devel; \ + fi && \ # Workaround broken ubi8 amd64 image installing python3.12 as # dependency of clang, which also breaks the /usr/bin/pip3 # symlink creation - ([ -f /usr/bin/python3.12 ] && yum remove -y python3.12 || true) && \ + ([ -f /usr/bin/python3.12 ] && [ "${RHEL_VERSION}" -lt 9 ] && yum remove -y python3.12 || true) && \ # Use same versions as those in ubuntu:jammy - pip3 install -q \ - Cython pytest==6.2.5 numpy==1.21.5 pytest-forked==1.4.0 \ - pytest-xdist==2.5.0 typing-extensions==3.10.0.2 && \ + pip3 install -q --user \ + Cython pytest==7.4.4 numpy==1.26.4 pytest-forked==1.6.0 \ + pytest-xdist==3.4.0 && \ rm -rf /var/cache/dnf/* && \ echo -e "/usr/local/lib" > /etc/ld.so.conf.d/local.conf; \ fi \ && ln -sf /usr/bin/python3 /usr/bin/python \ - && pip3 install --upgrade pip \ + && export PATH=/root/.local/bin:${PATH} \ + && python3 -m pip install --upgrade --user pip setuptools wheel \ && rm -rf ${HOME}/.cache # Install bazel ARG BAZEL_VERSION=6.5.0 ARG BAZEL_URL=https://github.com/bazelbuild/bazel/releases/download -RUN curl -sL ${BAZEL_URL}/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-dist.zip \ - -o bazel-${BAZEL_VERSION}-dist.zip \ +RUN curl -sL ${BAZEL_URL}/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-dist.zip -o bazel-${BAZEL_VERSION}-dist.zip \ && unzip -q -d bazel-${BAZEL_VERSION} bazel-${BAZEL_VERSION}-dist.zip \ && rm -f bazel-${BAZEL_VERSION}-dist.zip \ && cd bazel-${BAZEL_VERSION} \ + # We use JDK 11 to bootstrap because it doesn't enforce module boundaries. + # This avoids the "InaccessibleObjectException" during the build-from-scratch. + && export JAVA_HOME=$(ls -d /usr/lib/jvm/java-11-openjdk-* | head -n 1) \ + && export PATH="${JAVA_HOME}/bin:${PATH}" \ && EXTRA_BAZEL_ARGS="--show_progress_rate_limit=60 \ --color=no \ --curses=yes \ --copt=-Wno-error=implicit-fallthrough \ - --host_javabase=@local_jdk//:jdk \ --verbose_failures" \ - BAZEL_JAVAC_OPTS="-J-Xmx4g -J-Xms1g" ./compile.sh \ + ./compile.sh \ && cp output/bazel /usr/local/bin \ && cd .. && rm -rf bazel-${BAZEL_VERSION} ${HOME}/.cache @@ -98,28 +125,36 @@ ARG PROTOBUF_DIR=protobuf RUN git clone -b v${PROTOBUF_VERSION} --recursive ${PROTOBUF_URL} ${PROTOBUF_DIR} \ && mkdir -p ${PROTOBUF_DIR}/build \ && cd ${PROTOBUF_DIR}/build \ - # Must specify -Dprotobuf_BUILD_TESTS=OFF otherwise find_package(absl) - # in onnx will fail due to missing protobuf::gmock target - # Must specify -DCMAKE_CXX_STANDARD=17 since protobuf requires c++17 but - # clang 14 on Ubuntu Jammy defaults to c++14 - && CC=clang CXX=clang++ \ + # Use clang-20 for all architectures + # For s390x: Clang-20 has better SystemZ backend support with improved + # handling of multiply-with-overflow operations compared to Clang-18 + # which fails on Abseil duration.cc compilation + && export CC_COMPILER="clang-20" CXX_COMPILER="clang++-20" \ + && echo "Using compiler: $CC_COMPILER (version: $($CC_COMPILER --version | head -n1))" \ + # 1. CMake Build + && CC=$CC_COMPILER CXX=$CXX_COMPILER \ cmake -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_CXX_STANDARD=17 \ -DBUILD_SHARED_LIBS=ON \ -Dprotobuf_BUILD_TESTS=OFF .. \ && make -j${NPROC} install && ldconfig \ && cd .. \ - # New version of python protobuf can no longer be built with setup.py. - # Must use bazel to build. protobuf v6.31.1 is the first version using - # rules_rust 0.56.0 which has s390x support. rules_buf still needs a - # small patch. && export CARGO_BAZEL_GENERATOR_URL=file:///root/.cargo/bin/cargo-bazel \ && export CARGO_BAZEL_REPIN=true \ - && CC=clang CXX=clang++ bazel fetch //python/dist:binary_wheel \ + # For running the installed Bazel on JDK 21, we must pass the --host_jvm_args + # to open the internal Java modules. + && BAZEL_JDK_FLAGS="--host_jvm_args=--add-opens=java.base/java.lang=ALL-UNNAMED --host_jvm_args=--add-opens=java.base/java.nio=ALL-UNNAMED --host_jvm_args=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED" \ + # 2. Bazel Fetch (Use --repo_env for repository rules like Abseil) + && bazel ${BAZEL_JDK_FLAGS} fetch \ + --repo_env=CC=$CC_COMPILER --repo_env=CXX=$CXX_COMPILER \ + //python/dist:binary_wheel \ && sed -i -e 's/\["arm64", "amd64"\]/\["arm64", "amd64", "s390x"\]/g' \ ${HOME}/.cache/bazel/_bazel_root/*/external/rules_buf/buf/internal/toolchain.bzl \ - && CC=clang CXX=clang++ bazel build //python/dist:binary_wheel \ - && pip3 install bazel-bin/python/dist/protobuf-${PROTOBUF_VERSION}-*.whl \ + # 3. Bazel Build (Use --action_env for the compilation actions) + && bazel ${BAZEL_JDK_FLAGS} build \ + --action_env=CC=$CC_COMPILER --action_env=CXX=$CXX_COMPILER \ + //python/dist:binary_wheel \ + && pip3 install --user bazel-bin/python/dist/protobuf-${PROTOBUF_VERSION}-*.whl \ && cd .. && rm -rf ${PROTOBUF_DIR} ${HOME}/.cache # Install jsoniter diff --git a/docker/Dockerfile.onnx-mlir b/docker/Dockerfile.onnx-mlir index 3753b11f1d..2a4f0cd416 100644 --- a/docker/Dockerfile.onnx-mlir +++ b/docker/Dockerfile.onnx-mlir @@ -29,9 +29,22 @@ RUN ONNX_ROOT=${WORK_DIR}/onnx-mlir/third_party/onnx \ absl::log_internal_nullguard' CMakeLists.txt \ # Required for pip install with `--no-build-isolation` flag. # setuptools >= 70.x creates conflicts with pip packaging versions. - && python3 -m pip install --upgrade setuptools==68.2.2 \ - && CC=clang CXX=clang++ CMAKE_ARGS="-DCMAKE_INSTALL_LIBDIR=lib" \ - python3 -m pip install . --no-build-isolation \ + && python3 -m pip install --upgrade --user setuptools==77.0.1 \ + # Detect architecture and set flags + # Patch setup.py to bypass the stub check on s390x + && if [ "$(uname -m)" = "s390x" ]; then \ + EXTRA_CMAKE_ARGS="-DONNX_GEN_PB_TYPE_STUBS=OFF"; \ + # Robustly comment out the stub assertion by searching for the error message text + sed -i '/No generated python stubs found/s/^/#/' setup.py; \ + else \ + EXTRA_CMAKE_ARGS=""; \ + fi \ + && CC=clang-20 CXX=clang++-20 \ + CMAKE_ARGS="-DCMAKE_INSTALL_LIBDIR=lib \ + -Dprotobuf_DIR=/usr/local/lib/cmake/protobuf \ + -Dabsl_DIR=/usr/local/lib/cmake/absl \ + ${EXTRA_CMAKE_ARGS}" \ + python3 -m pip install --user . \ && rm -rf ${HOME}/.cache ARG NPROC=4 @@ -51,7 +64,7 @@ RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \ && rm -rf build && mkdir -p build && cd build \ # NNPA acclerator is built on all archs to enable lit tests # (dependent libzdnn is built on s390x only) - && CC=clang CXX=clang++ \ + && CC=clang-20 CXX=clang++-20 \ cmake -DMLIR_DIR=${LLVM_PROJECT_ROOT}/build/lib/cmake/mlir \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_ASSERTIONS=ON \ @@ -96,9 +109,9 @@ RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \ elif [ "${distro}" = "rhel" ] || [ "${distro}" = "fedora" ]; then \ pip3 uninstall -q -y Cython pybind11 pytest pytest-forked \ pytest-xdist typing-extensions && \ - yum remove -q -y \ + yum remove -q -y --noautoremove \ adwaita-icon-theme autoconf automake cmake file \ - git libtool ninja-build python39 && \ + git libtool ninja-build && \ rm -rf /var/cache/dnf/*; \ fi \ && rm -rf /tmp/* /usr/bin/python ${HOME}/.cache \ diff --git a/docker/Dockerfile.onnx-mlir-dev b/docker/Dockerfile.onnx-mlir-dev index c5ce85db28..091cf7c6af 100644 --- a/docker/Dockerfile.onnx-mlir-dev +++ b/docker/Dockerfile.onnx-mlir-dev @@ -24,9 +24,23 @@ RUN ONNX_ROOT=${WORK_DIR}/onnx-mlir/third_party/onnx \ absl::log_internal_nullguard' CMakeLists.txt \ # Required for pip install with `--no-build-isolation` flag. # setuptools >= 70.x creates conflicts with pip packaging versions. - && python3 -m pip install --upgrade setuptools==68.2.2 \ - && CC=clang CXX=clang++ CMAKE_ARGS="-DCMAKE_INSTALL_LIBDIR=lib" \ - python3 -m pip install . --no-build-isolation + && python3 -m pip install --upgrade --user setuptools==77.0.1 \ + # Detect architecture and set flags + && EXTRA_CMAKE_ARGS="" \ + # Patch setup.py to bypass the stub check on s390x + && if [ "$(uname -m)" = "s390x" ]; then \ + EXTRA_CMAKE_ARGS="-DONNX_GEN_PB_TYPE_STUBS=OFF"; \ + # Robustly comment out the stub assertion by searching for the error message text + sed -i '/No generated python stubs found/s/^/#/' setup.py; \ + else \ + EXTRA_CMAKE_ARGS=""; \ + fi \ + && CC=clang-20 CXX=clang++-20 \ + CMAKE_ARGS="-DCMAKE_INSTALL_LIBDIR=lib \ + -Dprotobuf_DIR=/usr/local/lib/cmake/protobuf \ + -Dabsl_DIR=/usr/local/lib/cmake/absl \ + ${EXTRA_CMAKE_ARGS}" \ + python3 -m pip install --user . ARG NPROC=4 ARG ACCEL=NNPA @@ -52,7 +66,7 @@ RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \ && rm -rf build && mkdir -p build && cd build \ # NNPA acclerator is built on all archs to enable lit tests # (dependent libzdnn is built on s390x only) - && CC=clang CXX=clang++ \ + && CC=clang-20 CXX=clang++-20 \ cmake -DMLIR_DIR=${LLVM_PROJECT_ROOT}/build/lib/cmake/mlir \ -DCMAKE_BUILD_TYPE=Debug \ -DONNX_MLIR_TEST_OPTLEVEL=0 \ @@ -98,7 +112,7 @@ RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \ git remote rename origin upstream; \ fi \ # Install joblib for model zoo tests - && pip3 install -q joblib \ + && pip3 install -q --user joblib \ && rm -rf /tmp/* # Copy from onnx-mlir-dev-test to squash removed source layer and