Skip to content

Commit a22bf56

Browse files
Add ppc64le wheel build support
1 parent bd019c0 commit a22bf56

File tree

12 files changed

+517
-13
lines changed

12 files changed

+517
-13
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Use UBI 9.3 as base image
2+
FROM registry.access.redhat.com/ubi9/ubi:9.3
3+
4+
# Install necessary dependencies
5+
RUN dnf install -y \
6+
https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
7+
dnf install -y git cmake ninja-build gcc-toolset-13 rust cargo zip \
8+
python3 python3-devel && \
9+
dnf clean all
10+
11+
ENV PATH="/opt/rh/gcc-toolset-13/root/usr/bin:$PATH"
12+
ENV MANPATH="/opt/rh/gcc-toolset-13/root/usr/share/man"
13+
ENV INFOPATH="/opt/rh/gcc-toolset-13/root/usr/share/info"
14+
ENV PCP_DIR="/opt/rh/gcc-toolset-13/root"
15+
ENV LD_LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib"
16+
17+
# Set Python and pip aliases to use Python 3.9
18+
RUN ln -sf /usr/bin/python3 /usr/bin/python && \
19+
ln -sf /usr/bin/pip3 /usr/bin/pip
20+
21+
COPY requirements.txt .
22+
# Install Python packages via pip
23+
RUN pip install wheel setuptools pyyaml typing_extensions expecttest
24+
25+
#RUN source /opt/rh/gcc-toolset-13/enable && pip install -r requirements.txt
26+
RUN pip install -r requirements.txt
27+
28+
# Copy the PyTorch source code into the container
29+
COPY . /workspace/pytorch
30+
31+
WORKDIR /workspace/pytorch
32+
33+
# Ensure submodules are initialized and updated
34+
RUN git submodule update --init --recursive
35+
36+
# Copy the build script and make it executable
37+
COPY .github/scripts/ppc64le-build.sh /ppc64le-build.sh
38+
RUN chmod +x /ppc64le-build.sh
39+
40+
# Verify permissions and ensure Unix line endings
41+
RUN dos2unix /ppc64le-build.sh || sed -i 's/\r$//' /ppc64le-build.sh
42+
RUN chmod +x /ppc64le-build.sh
43+
44+

.github/scripts/ppc64le-build.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env bash
2+
3+
# Environment variables
4+
PACKAGE_NAME=pytorch
5+
PACKAGE_VERSION=${PACKAGE_VERSION:-v2.4.0}
6+
7+
cd /workspace/$PACKAGE_NAME
8+
9+
# Clean up old artifacts
10+
rm -rf build/ dist/ torch.egg-info/
11+
12+
# Build and install PyTorch wheel
13+
if ! (MAX_JOBS=4 python setup.py bdist_wheel && pip install dist/*.whl); then
14+
echo "------------------$PACKAGE_NAME:install_fails-------------------------------------"
15+
exit 1
16+
fi
17+
18+
# register PrivateUse1HooksInterface
19+
python test/test_utils.py TestDeviceUtilsCPU.test_device_mode_ops_sparse_mm_reduce_cpu_bfloat16
20+
python test/test_utils.py TestDeviceUtilsCPU.test_device_mode_ops_sparse_mm_reduce_cpu_float16
21+
python test/test_utils.py TestDeviceUtilsCPU.test_device_mode_ops_sparse_mm_reduce_cpu_float32
22+
python test/test_utils.py TestDeviceUtilsCPU.test_device_mode_ops_sparse_mm_reduce_cpu_float64
23+
24+
cd ..
25+
pip install pytest pytest-xdist
26+
27+
if ! pytest "$PACKAGE_NAME/test/test_utils.py"; then
28+
echo "------------------$PACKAGE_NAME:install_success_but_test_fails---------------------"
29+
exit 2
30+
31+
else
32+
echo "------------------$PACKAGE_NAME:install_and_test_both_success-------------------------"
33+
exit 0
34+
fi
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Configuring the builder.
2+
3+
## Install prerequisites.
4+
5+
```
6+
Install Docker
7+
```
8+
## Clone pytorch repository
9+
10+
## Add services.
11+
12+
```
13+
$ sudo cp self-hosted-builder/*.service /etc/systemd/system/
14+
$ sudo systemctl daemon-reload
15+
```
16+
Next step is to build `actions-runner` image using:
17+
18+
```
19+
## clone gaplib repo (https://github.com/anup-kodlekere/gaplib.git) and copy runner-sdk-8.ppc64le patch from gaplib/build-files into pytorch/.github\scripts\ppc64le-ci\self-hosted-builder
20+
21+
$ cd self-hosted-builder
22+
$ sudo docker build \
23+
--pull \
24+
-f actions-runner.Dockerfile \
25+
--build-arg RUNNERPATCH="runner-sdk-8.ppc64le.patch" \
26+
-t iiilinuxibmcom/actions-runner.<name> \
27+
.
28+
```
29+
30+
Now prepare all necessary files for runner registration:
31+
32+
```
33+
$ sudo mkdir -p /etc/actions-runner/<name>
34+
$ sudo chmod 755 /etc/actions-runner/<name>
35+
$ sudo /bin/cp <github_app_private_key_file> /etc/actions-runner/<name>/key_private.pem
36+
$ sudo echo <github_app_id> | sudo tee /etc/actions-runner/<name>/appid.env
37+
$ sudo echo <github_app_install_id> | sudo tee /etc/actions-runner/<name>/installid.env
38+
$ sudo echo NAME=<worker_name> | sudo tee /etc/actions-runner/<name>/env
39+
$ sudo echo OWNER=<github_owner> | sudo tee -a /etc/actions-runner/<name>/env
40+
$ sudo echo REPO=pytorch | sudo tee -a /etc/actions-runner/<name>/env
41+
$ cd self-hosted-builder
42+
$ sudo /bin/cp helpers/*.sh /usr/local/bin/
43+
$ sudo chmod 755 /usr/local/bin/app_token.sh /usr/local/bin/gh_token_generator.sh
44+
```
45+
46+
## Autostart the runner.
47+
48+
```
49+
$ sudo systemctl enable --now actions-runner@$NAME
50+
```
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Self-Hosted IBM Power Github Actions Runner.
2+
FROM ubuntu:22.04
3+
4+
# Set non-interactive mode for apt
5+
ENV DEBIAN_FRONTEND=noninteractive
6+
7+
# Fix sources to point to ports.ubuntu.com for ppc64le
8+
RUN echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy main restricted universe multiverse" > /etc/apt/sources.list && \
9+
echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \
10+
echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \
11+
echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-security main restricted universe multiverse" >> /etc/apt/sources.list
12+
13+
# Fix sources for ppc64le and update system
14+
RUN apt-get update -o Acquire::Retries=5 -o Acquire::http::Timeout="10" && \
15+
apt-get -y install --no-install-recommends \
16+
build-essential \
17+
curl \
18+
sudo \
19+
jq \
20+
gnupg-agent \
21+
iptables \
22+
ca-certificates \
23+
software-properties-common \
24+
vim \
25+
zip \
26+
python3 \
27+
python3-pip && \
28+
apt-get clean && rm -rf /var/lib/apt/lists/*
29+
30+
# Switch to iptables-legacy
31+
RUN update-alternatives --set iptables /usr/sbin/iptables-legacy && \
32+
update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy
33+
34+
35+
# Add Docker GPG key and repository
36+
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg && \
37+
echo "deb [arch=ppc64el signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list && \
38+
apt-get update && apt-get install -y docker-ce docker-ce-cli containerd.io && \
39+
apt-get clean && rm -rf /var/lib/apt/lists/*
40+
41+
# Install dotnet SDK and other dependencies
42+
RUN apt-get update && apt-get install -y --no-install-recommends \
43+
wget \
44+
git \
45+
dotnet-sdk-8.0 \
46+
cmake \
47+
make \
48+
automake \
49+
autoconf \
50+
m4 \
51+
libtool && \
52+
apt-get clean && rm -rf /var/lib/apt/lists/*
53+
54+
55+
# Setup user and permissions
56+
RUN useradd -c "Action Runner" -m runner && \
57+
usermod -L runner && \
58+
echo "runner ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/runner && \
59+
groupadd docker || true && \
60+
usermod -aG docker runner && \
61+
(test -S /var/run/docker.sock && chmod 660 /var/run/docker.sock && chgrp docker /var/run/docker.sock || true)
62+
63+
64+
# Add and configure GitHub Actions runner
65+
ARG RUNNERREPO="https://github.com/actions/runner"
66+
ARG RUNNERPATCH
67+
68+
ADD ${RUNNERPATCH} /tmp/runner.patch
69+
70+
RUN git clone -q ${RUNNERREPO} /tmp/runner && \
71+
cd /tmp/runner && \
72+
git checkout main -b build && \
73+
git apply /tmp/runner.patch && \
74+
sed -i'' -e /version/s/8......\"$/${SDK}.0.100\"/ src/global.json
75+
76+
RUN cd /tmp/runner/src && \
77+
./dev.sh layout && \
78+
./dev.sh package && \
79+
./dev.sh test && \
80+
rm -rf /root/.dotnet /root/.nuget
81+
82+
RUN mkdir -p /opt/runner && \
83+
tar -xf /tmp/runner/_package/*.tar.gz -C /opt/runner && \
84+
chown -R runner:runner /opt/runner && \
85+
su - runner -c "/opt/runner/config.sh --version"
86+
87+
RUN rm -rf /tmp/runner /tmp/runner.patch
88+
89+
# Copy custom scripts and set permissions
90+
COPY fs/ /
91+
RUN chmod 777 /usr/bin/actions-runner /usr/bin/entrypoint
92+
93+
# Switch to the runner user
94+
USER runner
95+
96+
# Set working directory
97+
WORKDIR /opt/runner
98+
99+
# Define entry point and command
100+
ENTRYPOINT ["/usr/bin/entrypoint"]
101+
CMD ["/usr/bin/actions-runner"]
102+
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
[Unit]
2+
Description=Self-Hosted IBM power Github Actions Runner
3+
StartLimitIntervalSec=0
4+
5+
[Service]
6+
Type=simple
7+
Restart=always
8+
9+
# Cleanup stale containers
10+
ExecStartPre=-/usr/bin/docker rm --force actions-runner.%i
11+
ExecStartPre=-/usr/local/bin/gh_token_generator.sh /etc/actions-runner/%i/appid.env /etc/actions-runner/%i/installid.env /etc/actions-runner/%i/key_private.pem /etc/actions-runner/%i/ghtoken.env
12+
ExecStartPre=-/usr/local/bin/gh_cat_token.sh /etc/actions-runner/%i/ghtoken.env /etc/actions-runner/%i/ghtoken.txt
13+
14+
ExecStart=/usr/bin/docker run \
15+
--env-file=/etc/actions-runner/%i/env \
16+
--volume /etc/actions-runner/%i/ghtoken.txt:/run/runner_secret \
17+
--volume /var/run/docker.sock:/var/run/docker.sock \
18+
--init \
19+
--interactive \
20+
--name=actions-runner.%i \
21+
--rm \
22+
--privileged \
23+
--log-driver=journald \
24+
iiilinuxibmcom/actions-runner.%i
25+
ExecStop=/bin/sh -c "docker exec actions-runner.%i kill -INT -- -1"
26+
ExecStop=/bin/sh -c "docker wait actions-runner.%i"
27+
ExecStop=/bin/sh -c "docker rm actions-runner.%i"
28+
29+
ExecStop=/usr/bin/env rm -f /etc/actions-runner/%i/ghtoken.env /etc/actions-runner/%i/ghtoken.txt
30+
31+
[Install]
32+
WantedBy=multi-user.target
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env bash
2+
3+
set -e -u
4+
5+
trap cleanup EXIT
6+
7+
token_file=registration-token.json
8+
9+
# Function to clean up and unregister the runner
10+
cleanup() {
11+
echo "Cleaning up temporary files..."
12+
[ -f "$token_file" ] && rm -f "$token_file"
13+
[ -f "runner-id.json" ] && rm -f "runner-id.json"
14+
15+
echo "Unregistering the runner from GitHub..."
16+
ACCESS_TOKEN="$(cat /run/runner_secret)"
17+
runner_id=$(curl -s \
18+
-H "Accept: application/vnd.github.v3+json" \
19+
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
20+
"https://api.github.com/repos/${OWNER}/${REPO}/actions/runners" | \
21+
jq --raw-output '.runners[] | select(.name=="'"${NAME}"'") | .id')
22+
23+
if [ -n "$runner_id" ]; then
24+
curl -s \
25+
-X DELETE \
26+
-H "Accept: application/vnd.github.v3+json" \
27+
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
28+
"https://api.github.com/repos/${OWNER}/${REPO}/actions/runners/$runner_id"
29+
echo "Runner unregistered successfully."
30+
else
31+
echo "Warning: Runner ID for ${NAME} not found. It may already be removed."
32+
fi
33+
34+
unset ACCESS_TOKEN runner_id
35+
}
36+
37+
# Fetch GitHub access token
38+
if [ ! -f /run/runner_secret ]; then
39+
echo "Error: Access token file not found at /run/runner_secret."
40+
exit 1
41+
fi
42+
43+
44+
ACCESS_TOKEN="$(cat /run/runner_secret)"
45+
46+
# Generate registration token
47+
curl \
48+
-X POST \
49+
-H "Accept: application/vnd.github.v3+json" \
50+
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
51+
"https://api.github.com/repos/${OWNER}/${REPO}/actions/runners/registration-token" \
52+
-o "$token_file"
53+
54+
unset ACCESS_TOKEN
55+
56+
# register runner as ephemeral runner
57+
# it does one job, stops and unregisters
58+
registration_token=$(jq --raw-output .token "$token_file")
59+
60+
./config.sh \
61+
--unattended \
62+
--ephemeral \
63+
--url "https://github.com/${OWNER}/${REPO}" \
64+
--token "${registration_token}" \
65+
--name "${NAME}" \
66+
--no-default-labels \
67+
--labels self-hosted,linux.ppc64le
68+
69+
unset registration_token
70+
rm -f "$token_file"
71+
72+
# Run one job.
73+
./run.sh
74+
75+
echo "Ephemeral runner workflow completed."
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Container entrypoint that waits for all spawned processes.
5+
#
6+
7+
set -e -u
8+
9+
# Create a FIFO and start reading from its read end.
10+
tempdir=$(mktemp -d "/tmp/done.XXXXXXXXXX")
11+
trap 'rm -r "$tempdir"' EXIT
12+
done="$tempdir/pipe"
13+
mkfifo "$done"
14+
cat "$done" & waiter=$!
15+
16+
# Start the workload. Its descendants will inherit the FIFO's write end.
17+
status=0
18+
if [ "$#" -eq 0 ]; then
19+
bash 9>"$done" || status=$?
20+
else
21+
"$@" 9>"$done" || status=$?
22+
fi
23+
24+
# When the workload and all of its descendants exit, the FIFO's write end will
25+
# be closed and `cat "$done"` will exit. Wait until it happens. This is needed
26+
# in order to handle SelfUpdater, which the workload may start in background
27+
# before exiting.
28+
wait "$waiter"
29+
30+
exit "$status"

0 commit comments

Comments
 (0)