Skip to content

Commit 30747b5

Browse files
Merge pull request #2 from sandeepgupta12/temp-ppc64le-wheel-branch-v7
Temp ppc64le wheel branch v7
2 parents 3647711 + 5837c6d commit 30747b5

File tree

14 files changed

+523
-27
lines changed

14 files changed

+523
-27
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Use the manylinux_2_28 base image for ppc64le
2+
FROM quay.io/pypa/manylinux_2_28_ppc64le as base
3+
4+
# Language variables
5+
ENV LC_ALL=C.UTF-8
6+
ENV LANG=C.UTF-8
7+
ENV LANGUAGE=C.UTF-8
8+
9+
ARG DEVTOOLSET_VERSION=13
10+
11+
# Create symbolic links for Python 3.12
12+
RUN ln -sf /opt/python/cp312-cp312/bin/python3.12 /usr/bin/python3 && \
13+
ln -sf /opt/python/cp312-cp312/bin/python3.12 /usr/bin/python
14+
15+
# Install required system dependencies
16+
RUN yum -y install epel-release && \
17+
yum -y update && \
18+
yum install -y \
19+
sudo \
20+
autoconf \
21+
automake \
22+
bison \
23+
bzip2 \
24+
curl \
25+
diffutils \
26+
file \
27+
git \
28+
make \
29+
patch \
30+
perl \
31+
unzip \
32+
util-linux \
33+
wget \
34+
which \
35+
xz \
36+
yasm \
37+
less \
38+
zstd \
39+
libgomp \
40+
gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
41+
gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
42+
gcc-toolset-${DEVTOOLSET_VERSION}-binutils \
43+
gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
44+
cmake \
45+
ninja-build \
46+
rust \
47+
cargo \
48+
llvm-devel \
49+
libzstd-devel \
50+
python3.12-devel \
51+
python3.12-setuptools \
52+
python3.12-pip \
53+
python3-virtualenv \
54+
python3.12-pyyaml \
55+
python3.12-numpy \
56+
python3.12-wheel \
57+
python3.12-cryptography \
58+
blas-devel \
59+
openblas-devel \
60+
lapack-devel \
61+
atlas-devel \
62+
libjpeg-devel \
63+
libxslt-devel \
64+
libxml2-devel \
65+
openssl-devel \
66+
valgrind
67+
68+
69+
# Ensure the correct Python version is used
70+
ENV PATH=/opt/python/cp312-cp312/bin:$PATH
71+
# Add gcc-toolset to the path
72+
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
73+
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
74+
75+
# Configure git to avoid safe directory issues
76+
RUN git config --global --add safe.directory "*"
77+
78+
# Install required Python packages
79+
RUN pip install --upgrade pip
80+
RUN pip install typing_extensions pyyaml setuptools
81+
82+
# Install test dependencies
83+
RUN dnf install -y \
84+
protobuf-devel \
85+
protobuf-c-devel \
86+
protobuf-lite-devel \
87+
wget \
88+
patch
89+
90+
# Set default entrypoint
91+
ENTRYPOINT []
92+
CMD ["/bin/bash"]

.ci/docker/manywheel/build.sh

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ case ${GPU_ARCH_TYPE} in
6565
DOCKER_GPU_BUILD_ARG=""
6666
MANY_LINUX_VERSION="s390x"
6767
;;
68+
cpu-ppc64le)
69+
TARGET=base
70+
DOCKER_TAG=ppc64le
71+
GPU_IMAGE=redhat/ubi9
72+
DOCKER_GPU_BUILD_ARG=""
73+
MANY_LINUX_VERSION="ppc64le"
74+
;;
6875
cuda)
6976
TARGET=cuda_final
7077
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
@@ -121,8 +128,10 @@ fi
121128
(
122129
set -x
123130

124-
# Only activate this if in CI
125-
if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
131+
132+
133+
if [ "$(uname -m)" != "s390x" && "$(uname -m)" != "ppc64le" ] && [ -v CI ]; then
134+
126135
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
127136
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
128137
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service

.ci/docker/manywheel/build_scripts/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
2020
# the final image after compiling Python
2121
PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel"
2222

23-
if [ "$(uname -m)" != "s390x" ] ; then
23+
if [ "$(uname -m)" != "s390x" && "$(uname -m)" != "ppc64le" ] ; then
2424
PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} db4-devel"
2525
else
2626
PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} libdb-devel"

.ci/pytorch/build.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ fi
231231

232232
# Do not change workspace permissions for ROCm and s390x CI jobs
233233
# as it can leave workspace with bad permissions for cancelled jobs
234-
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then
234+
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *ppc64le* && -d /var/lib/jenkins/workspace ]]; then
235235
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
236236
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
237237
cleanup_workspace() {
@@ -275,8 +275,10 @@ else
275275
# XLA test build fails when WERROR=1
276276
# set only when building other architectures
277277
# or building non-XLA tests.
278+
# ppc64le builds fail when WERROR=1
278279
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
279-
"$BUILD_ENVIRONMENT" != *xla* ]]; then
280+
"$BUILD_ENVIRONMENT" != *xla* &&
281+
"$BUILD_ENVIRONMENT" != *ppc64le* ]]; then
280282
if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
281283
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
282284
python -mpip install numpy==2.0.2
@@ -399,6 +401,6 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
399401
python tools/stats/export_test_times.py
400402
fi
401403
# don't do this for bazel or s390x as they don't use sccache
402-
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
404+
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *ppc64le* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
403405
print_sccache_stats
404406
fi
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Configuring the builder.
2+
3+
## Install prerequisites.
4+
5+
```
6+
$ sudo apt install podman podman-docker jq
7+
```
8+
## Add services.
9+
10+
```
11+
$ sudo cp self-hosted-builder/*.service /etc/systemd/system/
12+
$ sudo systemctl daemon-reload
13+
```
14+
15+
## Rebuild the image
16+
17+
First build ppc64le builder image `docker.io/pytorch/manylinuxppc64le-builder`,
18+
using following commands:
19+
20+
```
21+
$ cd ~
22+
$ git clone https://github.com/pytorch/pytorch
23+
$ cd pytorch
24+
$ git submodule update --init --recursive
25+
$ GPU_ARCH_TYPE=cpu-ppc64le "$(pwd)/.ci/docker/manywheel/build.sh" manylinuxppc64le-builder
26+
$ docker image tag localhost/pytorch/manylinuxppc64le-builder docker.io/pytorch/manylinuxppc64le-builder:cpu-ppc64le
27+
$ docker image save -o ~/manywheel-ppc64le.tar docker.io/pytorch/manylinuxppc64le-builder:cpu-ppc64le
28+
```
29+
30+
Next step is to build `actions-runner` image using:
31+
32+
```
33+
## clone gaplib repo (https://github.com/anup-kodlekere/gaplib.git) and copy runner-sdk-8.ppc64le patch from gaplib/build-files into pytorch/.github\scripts\ppc64le-ci\self-hosted-builder
34+
35+
$ cd self-hosted-builder
36+
$ sudo docker build \
37+
--pull \
38+
-f actions-runner.Dockerfile \
39+
--build-arg RUNNERPATCH="runner-sdk-8.ppc64le.patch" \
40+
-t iiilinuxibmcom/actions-runner.<name> \
41+
.
42+
```
43+
44+
Now prepare all necessary files for runner registration:
45+
46+
```
47+
$ sudo mkdir -p /etc/actions-runner/<name>
48+
$ sudo chmod 755 /etc/actions-runner/<name>
49+
$ sudo /bin/cp <github_app_private_key_file> /etc/actions-runner/<name>/key_private.pem
50+
$ sudo echo <github_app_id> | sudo tee /etc/actions-runner/<name>/appid.env
51+
$ sudo echo <github_app_install_id> | sudo tee /etc/actions-runner/<name>/installid.env
52+
$ sudo echo NAME=<worker_name> | sudo tee /etc/actions-runner/<name>/env
53+
$ sudo echo ORG=<github_org> | sudo tee -a /etc/actions-runner/<name>/env
54+
$ cd self-hosted-builder
55+
$ sudo /bin/cp helpers/*.sh /usr/local/bin/
56+
$ sudo chmod 755 /usr/local/bin/app_token.sh /usr/local/bin/gh_token_generator.sh
57+
```
58+
59+
## Autostart the runner.
60+
61+
```
62+
$ sudo systemctl enable --now actions-runner@$NAME
63+
```
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Self-Hosted IBM Power Github Actions Runner.
2+
FROM ubuntu:22.04
3+
4+
# Set non-interactive mode for apt
5+
ENV DEBIAN_FRONTEND=noninteractive
6+
7+
# Fix sources to point to ports.ubuntu.com for ppc64le
8+
RUN echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy main restricted universe multiverse" > /etc/apt/sources.list && \
9+
echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \
10+
echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \
11+
echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-security main restricted universe multiverse" >> /etc/apt/sources.list
12+
13+
# Fix sources for ppc64le and update system
14+
RUN apt-get update -o Acquire::Retries=5 -o Acquire::http::Timeout="10" && \
15+
apt-get -y install --no-install-recommends \
16+
build-essential \
17+
curl \
18+
sudo \
19+
jq \
20+
gnupg-agent \
21+
iptables \
22+
ca-certificates \
23+
software-properties-common \
24+
vim \
25+
zip \
26+
python3 \
27+
python3-pip && \
28+
apt-get clean && rm -rf /var/lib/apt/lists/*
29+
30+
# Switch to iptables-legacy
31+
RUN update-alternatives --set iptables /usr/sbin/iptables-legacy && \
32+
update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy
33+
34+
35+
# Install Podman and podman-docker (Docker compatibility)
36+
RUN apt-get update && apt-get install -y podman podman-docker && \
37+
apt-get clean && rm -rf /var/lib/apt/lists/*
38+
39+
# Install dotnet SDK and other dependencies
40+
RUN apt-get update && apt-get install -y --no-install-recommends \
41+
wget \
42+
git \
43+
dotnet-sdk-8.0 \
44+
cmake \
45+
make \
46+
automake \
47+
autoconf \
48+
m4 \
49+
libtool && \
50+
apt-get clean && rm -rf /var/lib/apt/lists/*
51+
52+
53+
# Setup user and permissions
54+
RUN useradd -c "Action Runner" -m runner && \
55+
usermod -L runner && \
56+
echo "runner ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/runner && \
57+
groupadd podman || true && \
58+
usermod -aG podman runner
59+
60+
# Configure Podman cgroup manager
61+
RUN mkdir -p /etc/containers && \
62+
echo "[engine]\ncgroup_manager = \"cgroupfs\"" | sudo tee /etc/containers/containers.conf
63+
64+
# Add and configure GitHub Actions runner
65+
ARG RUNNERREPO="https://github.com/actions/runner"
66+
ARG RUNNERPATCH
67+
68+
ADD ${RUNNERPATCH} /tmp/runner.patch
69+
70+
RUN git clone -q ${RUNNERREPO} /tmp/runner && \
71+
cd /tmp/runner && \
72+
git checkout main -b build && \
73+
git apply /tmp/runner.patch && \
74+
sed -i'' -e /version/s/8......\"$/${SDK}.0.100\"/ src/global.json
75+
76+
RUN cd /tmp/runner/src && \
77+
./dev.sh layout && \
78+
./dev.sh package && \
79+
./dev.sh test && \
80+
rm -rf /root/.dotnet /root/.nuget
81+
82+
RUN mkdir -p /opt/runner && \
83+
tar -xf /tmp/runner/_package/*.tar.gz -C /opt/runner && \
84+
chown -R runner:runner /opt/runner && \
85+
su - runner -c "/opt/runner/config.sh --version"
86+
87+
RUN rm -rf /tmp/runner /tmp/runner.patch
88+
89+
# Copy custom scripts and set permissions
90+
COPY fs/ /
91+
RUN chmod +x /usr/bin/actions-runner /usr/bin/entrypoint
92+
93+
# Switch to the runner user
94+
USER runner
95+
96+
# Set working directory
97+
WORKDIR /opt/runner
98+
99+
COPY --chown=runner:runner manywheel-ppc64le.tar /opt/runner/manywheel-ppc64le.tar
100+
101+
# Define entry point and command
102+
ENTRYPOINT ["/usr/bin/entrypoint"]
103+
CMD ["/usr/bin/actions-runner"]
104+
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
[Unit]
2+
Description=Self-Hosted IBM power Github Actions Runner
3+
StartLimitIntervalSec=0
4+
5+
[Service]
6+
Type=simple
7+
Restart=always
8+
9+
# Cleanup stale containers
10+
ExecStartPre=-/usr/bin/docker rm --force actions-runner.%i
11+
ExecStartPre=-/usr/local/bin/gh_token_generator.sh /etc/actions-runner/%i/appid.env /etc/actions-runner/%i/installid.env /etc/actions-runner/%i/key_private.pem /etc/actions-runner/%i/ghtoken.env
12+
ExecStartPre=-/usr/local/bin/gh_cat_token.sh /etc/actions-runner/%i/ghtoken.env /etc/actions-runner/%i/ghtoken.txt
13+
14+
ExecStart=/usr/bin/docker run \
15+
--env-file=/etc/actions-runner/%i/env \
16+
--volume /etc/actions-runner/%i/ghtoken.txt:/run/runner_secret \
17+
--init \
18+
--interactive \
19+
--name=actions-runner.%i \
20+
--rm \
21+
--privileged \
22+
--log-driver=journald \
23+
iiilinuxibmcom/actions-runner.%i
24+
ExecStop=/bin/sh -c "docker exec actions-runner.%i kill -INT -- -1"
25+
ExecStop=/bin/sh -c "docker wait actions-runner.%i"
26+
ExecStop=/bin/sh -c "docker rm actions-runner.%i"
27+
28+
ExecStop=/usr/bin/env rm -f /etc/actions-runner/%i/ghtoken.env /etc/actions-runner/%i/ghtoken.txt
29+
30+
[Install]
31+
WantedBy=multi-user.target

0 commit comments

Comments
 (0)