Skip to content

Commit 2bbaf3b

Browse files
Address review comments:Fix issue with Docker image load as per review
Update runner registration flow based on feedback Enhance security by unmounting and removing runner token file To prevent any potential token leakage, unmount and remove /run/runner_secret immediately after generating the token. This ensures that the token is inaccessible beyond its intended use, even within the job execution.
1 parent 5871a5c commit 2bbaf3b

File tree

7 files changed

+96
-122
lines changed

7 files changed

+96
-122
lines changed
Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# Use UBI 9.3 as base image
2-
FROM registry.access.redhat.com/ubi9/ubi:9.5
1+
# Use UBI 9 as base image
2+
FROM registry.access.redhat.com/ubi9/ubi:9.5 AS base
33

44
# Install necessary dependencies
55
RUN dnf install -y \
@@ -20,20 +20,12 @@ RUN ln -sf /usr/bin/python3 /usr/bin/python && \
2020

2121
COPY requirements.txt .
2222
# Install Python packages via pip
23-
RUN pip install wheel setuptools pyyaml typing_extensions expecttest
24-
25-
#RUN source /opt/rh/gcc-toolset-13/enable && pip install -r requirements.txt
23+
RUN pip install wheel
2624
RUN pip install -r requirements.txt
2725

28-
# Copy the PyTorch source code into the container
29-
COPY . /workspace/pytorch
30-
31-
WORKDIR /workspace/pytorch
32-
33-
# Ensure submodules are initialized and updated
34-
RUN git submodule update --init --recursive
26+
RUN mkdir -p /workspace/pytorch
3527

36-
# Copy the build script and make it executable
37-
COPY .github/scripts/ppc64le-build.sh /ppc64le-build.sh
28+
ENTRYPOINT []
29+
CMD ["/bin/bash"]
3830

3931

.ci/docker/manywheel/build.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ case ${GPU_ARCH_TYPE} in
6565
DOCKER_GPU_BUILD_ARG=""
6666
MANY_LINUX_VERSION="s390x"
6767
;;
68+
cpu-ppc64le)
69+
TARGET=base
70+
DOCKER_TAG=ppc64le
71+
GPU_IMAGE=redhat/ubi9
72+
DOCKER_GPU_BUILD_ARG=""
73+
MANY_LINUX_VERSION="ppc64le"
74+
;;
6875
cuda)
6976
TARGET=cuda_final
7077
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
@@ -121,7 +128,7 @@ fi
121128
(
122129
set -x
123130

124-
if [ "$(uname -m)" != "s390x" ]; then
131+
if [ "$(uname -m)" != "s390x" && "$(uname -m)" != "ppc64le" ]; then
125132
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
126133
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
127134
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service

.github/scripts/ppc64le-ci/README.md

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,30 @@
33
## Install prerequisites.
44

55
```
6-
Install Docker
6+
$ sudo dnf install podman podman-docker jq
77
```
8-
## Clone pytorch repository
9-
108
## Add services.
119

1210
```
1311
$ sudo cp self-hosted-builder/*.service /etc/systemd/system/
1412
$ sudo systemctl daemon-reload
1513
```
14+
15+
## Rebuild the image
16+
17+
First build ppc64le builder image `docker.io/pytorch/ubippc64le-builder`,
18+
using following commands:
19+
20+
```
21+
$ cd ~
22+
$ git clone https://github.com/pytorch/pytorch
23+
$ cd pytorch
24+
$ git submodule update --init --recursive
25+
$ GPU_ARCH_TYPE=cpu-ppc64le "$(pwd)/.ci/docker/manywheel/build.sh" ubippc64le-builder
26+
$ docker image tag localhost/pytorch/ubippc64le-builder docker.io/pytorch/ubippc64le-builder:cpu-ppc64le
27+
$ docker image save -o ~/ubi-ppc64le.tar docker.io/pytorch/ubippc64le-builder:cpu-ppc64le
28+
```
29+
1630
Next step is to build `actions-runner` image using:
1731

1832
```
@@ -36,8 +50,7 @@ $ sudo /bin/cp <github_app_private_key_file> /etc/actions-runner/<name>/key_priv
3650
$ sudo echo <github_app_id> | sudo tee /etc/actions-runner/<name>/appid.env
3751
$ sudo echo <github_app_install_id> | sudo tee /etc/actions-runner/<name>/installid.env
3852
$ sudo echo NAME=<worker_name> | sudo tee /etc/actions-runner/<name>/env
39-
$ sudo echo OWNER=<github_owner> | sudo tee -a /etc/actions-runner/<name>/env
40-
$ sudo echo REPO=pytorch | sudo tee -a /etc/actions-runner/<name>/env
53+
$ sudo echo ORG=<github_org> | sudo tee -a /etc/actions-runner/<name>/env
4154
$ cd self-hosted-builder
4255
$ sudo /bin/cp helpers/*.sh /usr/local/bin/
4356
$ sudo chmod 755 /usr/local/bin/app_token.sh /usr/local/bin/gh_token_generator.sh

.github/scripts/ppc64le-ci/self-hosted-builder/actions-runner.Dockerfile

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,8 @@ RUN update-alternatives --set iptables /usr/sbin/iptables-legacy && \
3232
update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy
3333

3434

35-
# Add Docker GPG key and repository
36-
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg && \
37-
echo "deb [arch=ppc64el signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list && \
38-
apt-get update && apt-get install -y docker-ce docker-ce-cli containerd.io && \
35+
# Install Podman and podman-docker (Docker compatibility)
36+
RUN apt-get update && apt-get install -y podman podman-docker && \
3937
apt-get clean && rm -rf /var/lib/apt/lists/*
4038

4139
# Install dotnet SDK and other dependencies
@@ -56,10 +54,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
5654
RUN useradd -c "Action Runner" -m runner && \
5755
usermod -L runner && \
5856
echo "runner ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/runner && \
59-
groupadd docker || true && \
60-
usermod -aG docker runner && \
61-
(test -S /var/run/docker.sock && chmod 660 /var/run/docker.sock && chgrp docker /var/run/docker.sock || true)
57+
groupadd podman || true && \
58+
usermod -aG podman runner
6259

60+
# Configure Podman cgroup manager
61+
RUN mkdir -p /etc/containers && \
62+
echo "[engine]\ncgroup_manager = \"cgroupfs\"" | sudo tee /etc/containers/containers.conf
6363

6464
# Add and configure GitHub Actions runner
6565
ARG RUNNERREPO="https://github.com/actions/runner"
@@ -96,6 +96,8 @@ USER runner
9696
# Set working directory
9797
WORKDIR /opt/runner
9898

99+
COPY --chown=runner:runner pytorch-ubi-ppc64le.tar /opt/runner/pytorch-ubi-ppc64le.tar
100+
99101
# Define entry point and command
100102
ENTRYPOINT ["/usr/bin/entrypoint"]
101103
CMD ["/usr/bin/actions-runner"]

.github/scripts/ppc64le-ci/self-hosted-builder/fs/usr/bin/actions-runner

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,15 @@
22

33
set -e -u
44

5-
trap cleanup EXIT
5+
# first import docker image
6+
if [ -f ./pytorch-ubi-ppc64le.tar ] ; then
7+
docker image load --input pytorch-ubi-ppc64le.tar
8+
docker image tag docker.io/pytorch/ubippc64le-builder:cpu-ppc64le docker.io/pytorch/ubippc64le-builder:cpu-ppc64le-main
9+
rm -f ubi-ppc64le.tar
10+
fi
611

712
token_file=registration-token.json
813

9-
# Function to clean up and unregister the runner
10-
cleanup() {
11-
echo "Cleaning up temporary files..."
12-
[ -f "$token_file" ] && rm -f "$token_file"
13-
[ -f "runner-id.json" ] && rm -f "runner-id.json"
14-
15-
echo "Unregistering the runner from GitHub..."
16-
ACCESS_TOKEN="$(cat /run/runner_secret)"
17-
runner_id=$(curl -s \
18-
-H "Accept: application/vnd.github.v3+json" \
19-
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
20-
"https://api.github.com/repos/${OWNER}/${REPO}/actions/runners" | \
21-
jq --raw-output '.runners[] | select(.name=="'"${NAME}"'") | .id')
22-
23-
if [ -n "$runner_id" ]; then
24-
curl -s \
25-
-X DELETE \
26-
-H "Accept: application/vnd.github.v3+json" \
27-
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
28-
"https://api.github.com/repos/${OWNER}/${REPO}/actions/runners/$runner_id"
29-
echo "Runner unregistered successfully."
30-
else
31-
echo "Warning: Runner ID for ${NAME} not found. It may already be removed."
32-
fi
33-
34-
unset ACCESS_TOKEN runner_id
35-
}
36-
3714
# Fetch GitHub access token
3815
if [ ! -f /run/runner_secret ]; then
3916
echo "Error: Access token file not found at /run/runner_secret."
@@ -48,19 +25,22 @@ curl \
4825
-X POST \
4926
-H "Accept: application/vnd.github.v3+json" \
5027
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
51-
"https://api.github.com/repos/${OWNER}/${REPO}/actions/runners/registration-token" \
28+
"https://api.github.com/orgs/${ORG}/actions/runners/registration-token" \
5229
-o "$token_file"
5330

5431
unset ACCESS_TOKEN
5532

33+
sudo umount /run/runner_secret
34+
sudo rm -f /run/runner_secret
35+
5636
# register runner as ephemeral runner
5737
# it does one job, stops and unregisters
5838
registration_token=$(jq --raw-output .token "$token_file")
5939

6040
./config.sh \
6141
--unattended \
6242
--ephemeral \
63-
--url "https://github.com/${OWNER}/${REPO}" \
43+
--url "https://github.com/${ORG}" \
6444
--token "${registration_token}" \
6545
--name "${NAME}" \
6646
--no-default-labels \

.github/workflows/_linux-build.yml

Lines changed: 13 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ jobs:
109109
steps:
110110
- name: Setup SSH (Click me for login details)
111111
uses: pytorch/test-infra/.github/actions/setup-ssh@main
112-
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
112+
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
113113
with:
114114
github-secret: ${{ secrets.GITHUB_TOKEN }}
115115

@@ -119,17 +119,16 @@ jobs:
119119
# checkout. In other cases you should prefer a local checkout.
120120
- name: Checkout PyTorch
121121
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
122-
if: inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
123122
with:
124123
no-sudo: true
125124

126125
- name: Setup Linux
127126
uses: ./.github/actions/setup-linux
128-
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
127+
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
129128

130129
- name: configure aws credentials
131130
uses: aws-actions/configure-aws-credentials@v3
132-
if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' }}
131+
if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
133132
with:
134133
role-to-assume: ${{ inputs.aws-role-to-assume }}
135134
role-session-name: gha-linux-build
@@ -138,13 +137,13 @@ jobs:
138137
- name: Calculate docker image
139138
id: calculate-docker-image
140139
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
141-
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
140+
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
142141
with:
143142
docker-image-name: ${{ inputs.docker-image-name }}
144143

145144
- name: Use following to pull public copy of the image
146145
id: print-ghcr-mirror
147-
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
146+
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
148147
env:
149148
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
150149
shell: bash
@@ -154,26 +153,24 @@ jobs:
154153
155154
- name: Pull docker image
156155
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
157-
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
156+
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
158157
with:
159158
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
160159

161160
- name: Parse ref
162161
id: parse-ref
163-
if: inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
164162
run: .github/scripts/parse_ref.py
165163

166164
- name: Get workflow job id
167165
id: get-job-id
168166
uses: ./.github/actions/get-workflow-job-id
169-
if: always() && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
167+
if: always()
170168
with:
171169
github-token: ${{ secrets.GITHUB_TOKEN }}
172170

173171
# Apply the filter logic to the build step too if the test-config label is already there
174172
- name: Select all requested test configurations (if the test matrix is available)
175173
id: filter
176-
if: inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
177174
uses: ./.github/actions/filter-test-configs
178175
with:
179176
github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -184,14 +181,14 @@ jobs:
184181
- name: Download pytest cache
185182
uses: ./.github/actions/pytest-cache-download
186183
continue-on-error: true
187-
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
184+
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
188185
with:
189186
cache_dir: .pytest_cache
190187
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
191188
s3_bucket: ${{ inputs.s3-bucket }}
192189

193190
- name: Build
194-
if: (steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == '') && (inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9')
191+
if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
195192
id: build
196193
env:
197194
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
@@ -278,24 +275,14 @@ jobs:
278275
END_TIME=$(date +%s)
279276
echo "build_time=$((END_TIME - START_TIME))" >> "$GITHUB_OUTPUT"
280277
281-
- name: Execute Build and Tests inside ppc64le Docker Container
282-
if: inputs.build-environment == 'linux-ppc64le-binary-manywheel-ubi9'
283-
run: |
284-
CONTAINER_NAME="temp_builder_${RUN_ID}"
285-
docker run -d --name "$CONTAINER_NAME" pytorch-ppc64le:ubi9.3 /ppc64le-build.sh
286-
docker wait "$CONTAINER_NAME"
287-
docker logs "$CONTAINER_NAME"
288-
docker cp "$CONTAINER_NAME":/workspace/pytorch/dist/. dist/
289-
docker rm "$CONTAINER_NAME"
290-
291278
- name: Archive artifacts into zip
292279
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
293280
run: |
294281
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .additional_ci_files
295282
296283
- name: Store PyTorch Build Artifacts on S3
297284
uses: seemethere/upload-artifact-s3@v5
298-
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
285+
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
299286
with:
300287
name: ${{ inputs.build-environment }}
301288
retention-days: 14
@@ -305,7 +292,7 @@ jobs:
305292

306293
- name: Store PyTorch Build Artifacts on S3 for split build
307294
uses: seemethere/upload-artifact-s3@v5
308-
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
295+
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
309296
with:
310297
name: ${{ inputs.build-environment }}-experimental-split-build
311298
retention-days: 14
@@ -331,35 +318,16 @@ jobs:
331318
if-no-files-found: error
332319
path: artifacts.zip
333320

334-
- name: Archive ppc64le artifacts into zip
335-
if: inputs.build-environment == 'linux-ppc64le-binary-manywheel-ubi9'
336-
run: |
337-
zip -1 -r artifacts.zip dist/
338-
339-
340-
- name: Store PyTorch Build Artifacts for ppc64le
341-
uses: actions/upload-artifact@v4
342-
if: inputs.build-environment == 'linux-ppc64le-binary-manywheel-ubi9'
343-
with:
344-
name: ${{ inputs.build-environment }}-ubi9
345-
retention-days: 14
346-
if-no-files-found: error
347-
path: artifacts.zip
348-
349-
- name: Cleanup dangling Docker images for ppc64le
350-
if: always() && inputs.build-environment == 'linux-ppc64le-binary-manywheel-ubi9'
351-
run: docker image prune -f
352-
353321
- name: Upload sccache stats
354-
if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9'
322+
if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
355323
uses: ./.github/actions/upload-sccache-stats
356324
with:
357325
github-token: ${{ secrets.GITHUB_TOKEN }}
358326
build-time: ${{ steps.build.outputs.build_time }}
359327

360328
- name: Teardown Linux
361329
uses: pytorch/test-infra/.github/actions/teardown-linux@main
362-
if: always() && (inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9')
330+
if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel'
363331

364332
- name: Cleanup docker
365333
if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel'

0 commit comments

Comments
 (0)