diff --git a/.ci/docker/manywheel/Dockerfile_ppc64le b/.ci/docker/manywheel/Dockerfile_ppc64le new file mode 100755 index 000000000000..441ac7d61049 --- /dev/null +++ b/.ci/docker/manywheel/Dockerfile_ppc64le @@ -0,0 +1,44 @@ +# Use UBI 9.3 as base image +FROM registry.access.redhat.com/ubi9/ubi:9.5 + +# Install necessary dependencies +RUN dnf install -y \ + https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + dnf install -y git cmake ninja-build gcc-toolset-13 rust cargo zip \ + python3 python3-devel && \ + dnf clean all + +ENV PATH="/opt/rh/gcc-toolset-13/root/usr/bin:$PATH" +ENV MANPATH="/opt/rh/gcc-toolset-13/root/usr/share/man" +ENV INFOPATH="/opt/rh/gcc-toolset-13/root/usr/share/info" +ENV PCP_DIR="/opt/rh/gcc-toolset-13/root" +ENV LD_LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib" + +# Set Python and pip aliases to use Python 3.9 +RUN ln -sf /usr/bin/python3 /usr/bin/python && \ + ln -sf /usr/bin/pip3 /usr/bin/pip + +COPY requirements.txt . +# Install Python packages via pip +RUN pip install wheel setuptools pyyaml typing_extensions expecttest + +#RUN source /opt/rh/gcc-toolset-13/enable && pip install -r requirements.txt +RUN pip install -r requirements.txt + +# Copy the PyTorch source code into the container +COPY . /workspace/pytorch + +WORKDIR /workspace/pytorch + +# Ensure submodules are initialized and updated +RUN git submodule update --init --recursive + +# Copy the build script and make it executable +COPY .github/scripts/ppc64le-build.sh /ppc64le-build.sh +RUN chmod +x /ppc64le-build.sh + +# Verify permissions and ensure Unix line endings +RUN dos2unix /ppc64le-build.sh || sed -i 's/\r$//' /ppc64le-build.sh +RUN chmod +x /ppc64le-build.sh + + diff --git a/.github/scripts/ppc64le-build.sh b/.github/scripts/ppc64le-build.sh new file mode 100755 index 000000000000..72bb66a12772 --- /dev/null +++ b/.github/scripts/ppc64le-build.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Environment variables +PACKAGE_NAME=pytorch +PACKAGE_VERSION=${PACKAGE_VERSION:-v2.6.0} + +cd /workspace/$PACKAGE_NAME + +# Clean up old artifacts +rm -rf build/ dist/ torch.egg-info/ + +# Build and install PyTorch wheel +if ! (MAX_JOBS=$(nproc) python setup.py bdist_wheel && pip install dist/*.whl); then + echo "------------------$PACKAGE_NAME:install_fails-------------------------------------" + exit 1 +fi + +# register PrivateUse1HooksInterface +python test/test_utils.py TestDeviceUtilsCPU.test_device_mode_ops_sparse_mm_reduce_cpu_bfloat16 +python test/test_utils.py TestDeviceUtilsCPU.test_device_mode_ops_sparse_mm_reduce_cpu_float16 +python test/test_utils.py TestDeviceUtilsCPU.test_device_mode_ops_sparse_mm_reduce_cpu_float32 +python test/test_utils.py TestDeviceUtilsCPU.test_device_mode_ops_sparse_mm_reduce_cpu_float64 + +cd .. +pip install pytest pytest-xdist + +if ! pytest "$PACKAGE_NAME/test/test_utils.py"; then + echo "------------------$PACKAGE_NAME:install_success_but_test_fails---------------------" + exit 2 + +else + echo "------------------$PACKAGE_NAME:install_and_test_both_success-------------------------" + exit 0 +fi \ No newline at end of file diff --git a/.github/scripts/ppc64le-ci/README.md b/.github/scripts/ppc64le-ci/README.md new file mode 100755 index 000000000000..5be1405613d2 --- /dev/null +++ b/.github/scripts/ppc64le-ci/README.md @@ -0,0 +1,50 @@ +# Configuring the builder. + +## Install prerequisites. + +``` +Install Docker +``` +## Clone pytorch repository + +## Add services. + +``` +$ sudo cp self-hosted-builder/*.service /etc/systemd/system/ +$ sudo systemctl daemon-reload +``` +Next step is to build `actions-runner` image using: + +``` +## clone gaplib repo (https://github.com/anup-kodlekere/gaplib.git) and copy runner-sdk-8.ppc64le patch from gaplib/build-files into pytorch/.github\scripts\ppc64le-ci\self-hosted-builder + +$ cd self-hosted-builder +$ sudo docker build \ + --pull \ + -f actions-runner.Dockerfile \ + --build-arg RUNNERPATCH="runner-sdk-8.ppc64le.patch" \ + -t iiilinuxibmcom/actions-runner. \ + . +``` + +Now prepare all necessary files for runner registration: + +``` +$ sudo mkdir -p /etc/actions-runner/ +$ sudo chmod 755 /etc/actions-runner/ +$ sudo /bin/cp /etc/actions-runner//key_private.pem +$ sudo echo | sudo tee /etc/actions-runner//appid.env +$ sudo echo | sudo tee /etc/actions-runner//installid.env +$ sudo echo NAME= | sudo tee /etc/actions-runner//env +$ sudo echo OWNER= | sudo tee -a /etc/actions-runner//env +$ sudo echo REPO=pytorch | sudo tee -a /etc/actions-runner//env +$ cd self-hosted-builder +$ sudo /bin/cp helpers/*.sh /usr/local/bin/ +$ sudo chmod 755 /usr/local/bin/app_token.sh /usr/local/bin/gh_token_generator.sh +``` + +## Autostart the runner. + +``` +$ sudo systemctl enable --now actions-runner@$NAME +``` diff --git a/.github/scripts/ppc64le-ci/self-hosted-builder/actions-runner.Dockerfile b/.github/scripts/ppc64le-ci/self-hosted-builder/actions-runner.Dockerfile new file mode 100755 index 000000000000..c52a3c718c4a --- /dev/null +++ b/.github/scripts/ppc64le-ci/self-hosted-builder/actions-runner.Dockerfile @@ -0,0 +1,102 @@ +# Self-Hosted IBM Power Github Actions Runner. +FROM ubuntu:22.04 + +# Set non-interactive mode for apt +ENV DEBIAN_FRONTEND=noninteractive + +# Fix sources to point to ports.ubuntu.com for ppc64le +RUN echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy main restricted universe multiverse" > /etc/apt/sources.list && \ + echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports jammy-security main restricted universe multiverse" >> /etc/apt/sources.list + +# Fix sources for ppc64le and update system +RUN apt-get update -o Acquire::Retries=5 -o Acquire::http::Timeout="10" && \ + apt-get -y install --no-install-recommends \ + build-essential \ + curl \ + sudo \ + jq \ + gnupg-agent \ + iptables \ + ca-certificates \ + software-properties-common \ + vim \ + zip \ + python3 \ + python3-pip && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Switch to iptables-legacy +RUN update-alternatives --set iptables /usr/sbin/iptables-legacy && \ + update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy + + +# Add Docker GPG key and repository +RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg && \ + echo "deb [arch=ppc64el signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list && \ + apt-get update && apt-get install -y docker-ce docker-ce-cli containerd.io && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Install dotnet SDK and other dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + git \ + dotnet-sdk-8.0 \ + cmake \ + make \ + automake \ + autoconf \ + m4 \ + libtool && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + + +# Setup user and permissions +RUN useradd -c "Action Runner" -m runner && \ + usermod -L runner && \ + echo "runner ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/runner && \ + groupadd docker || true && \ + usermod -aG docker runner && \ + (test -S /var/run/docker.sock && chmod 660 /var/run/docker.sock && chgrp docker /var/run/docker.sock || true) + + +# Add and configure GitHub Actions runner +ARG RUNNERREPO="https://github.com/actions/runner" +ARG RUNNERPATCH + +ADD ${RUNNERPATCH} /tmp/runner.patch + +RUN git clone -q ${RUNNERREPO} /tmp/runner && \ + cd /tmp/runner && \ + git checkout main -b build && \ + git apply /tmp/runner.patch && \ + sed -i'' -e /version/s/8......\"$/${SDK}.0.100\"/ src/global.json + +RUN cd /tmp/runner/src && \ + ./dev.sh layout && \ + ./dev.sh package && \ + ./dev.sh test && \ + rm -rf /root/.dotnet /root/.nuget + +RUN mkdir -p /opt/runner && \ + tar -xf /tmp/runner/_package/*.tar.gz -C /opt/runner && \ + chown -R runner:runner /opt/runner && \ + su - runner -c "/opt/runner/config.sh --version" + +RUN rm -rf /tmp/runner /tmp/runner.patch + +# Copy custom scripts and set permissions +COPY fs/ / +RUN chmod 777 /usr/bin/actions-runner /usr/bin/entrypoint + +# Switch to the runner user +USER runner + +# Set working directory +WORKDIR /opt/runner + +# Define entry point and command +ENTRYPOINT ["/usr/bin/entrypoint"] +CMD ["/usr/bin/actions-runner"] + diff --git a/.github/scripts/ppc64le-ci/self-hosted-builder/actions-runner@.service b/.github/scripts/ppc64le-ci/self-hosted-builder/actions-runner@.service new file mode 100755 index 000000000000..abbc0ffc97e5 --- /dev/null +++ b/.github/scripts/ppc64le-ci/self-hosted-builder/actions-runner@.service @@ -0,0 +1,32 @@ +[Unit] +Description=Self-Hosted IBM power Github Actions Runner +StartLimitIntervalSec=0 + +[Service] +Type=simple +Restart=always + +# Cleanup stale containers +ExecStartPre=-/usr/bin/docker rm --force actions-runner.%i +ExecStartPre=-/usr/local/bin/gh_token_generator.sh /etc/actions-runner/%i/appid.env /etc/actions-runner/%i/installid.env /etc/actions-runner/%i/key_private.pem /etc/actions-runner/%i/ghtoken.env +ExecStartPre=-/usr/local/bin/gh_cat_token.sh /etc/actions-runner/%i/ghtoken.env /etc/actions-runner/%i/ghtoken.txt + +ExecStart=/usr/bin/docker run \ + --env-file=/etc/actions-runner/%i/env \ + --volume /etc/actions-runner/%i/ghtoken.txt:/run/runner_secret \ + --volume /var/run/docker.sock:/var/run/docker.sock \ + --init \ + --interactive \ + --name=actions-runner.%i \ + --rm \ + --privileged \ + --log-driver=journald \ + iiilinuxibmcom/actions-runner.%i +ExecStop=/bin/sh -c "docker exec actions-runner.%i kill -INT -- -1" +ExecStop=/bin/sh -c "docker wait actions-runner.%i" +ExecStop=/bin/sh -c "docker rm actions-runner.%i" + +ExecStop=/usr/bin/env rm -f /etc/actions-runner/%i/ghtoken.env /etc/actions-runner/%i/ghtoken.txt + +[Install] +WantedBy=multi-user.target diff --git a/.github/scripts/ppc64le-ci/self-hosted-builder/fs/usr/bin/actions-runner b/.github/scripts/ppc64le-ci/self-hosted-builder/fs/usr/bin/actions-runner new file mode 100755 index 000000000000..e8c9bde29074 --- /dev/null +++ b/.github/scripts/ppc64le-ci/self-hosted-builder/fs/usr/bin/actions-runner @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +set -e -u + +trap cleanup EXIT + +token_file=registration-token.json + +# Function to clean up and unregister the runner +cleanup() { + echo "Cleaning up temporary files..." + [ -f "$token_file" ] && rm -f "$token_file" + [ -f "runner-id.json" ] && rm -f "runner-id.json" + + echo "Unregistering the runner from GitHub..." + ACCESS_TOKEN="$(cat /run/runner_secret)" + runner_id=$(curl -s \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "https://api.github.com/repos/${OWNER}/${REPO}/actions/runners" | \ + jq --raw-output '.runners[] | select(.name=="'"${NAME}"'") | .id') + + if [ -n "$runner_id" ]; then + curl -s \ + -X DELETE \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "https://api.github.com/repos/${OWNER}/${REPO}/actions/runners/$runner_id" + echo "Runner unregistered successfully." + else + echo "Warning: Runner ID for ${NAME} not found. It may already be removed." + fi + + unset ACCESS_TOKEN runner_id +} + +# Fetch GitHub access token +if [ ! -f /run/runner_secret ]; then + echo "Error: Access token file not found at /run/runner_secret." + exit 1 +fi + + +ACCESS_TOKEN="$(cat /run/runner_secret)" + +# Generate registration token +curl \ + -X POST \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "https://api.github.com/repos/${OWNER}/${REPO}/actions/runners/registration-token" \ + -o "$token_file" + +unset ACCESS_TOKEN + +# register runner as ephemeral runner +# it does one job, stops and unregisters +registration_token=$(jq --raw-output .token "$token_file") + +./config.sh \ + --unattended \ + --ephemeral \ + --url "https://github.com/${OWNER}/${REPO}" \ + --token "${registration_token}" \ + --name "${NAME}" \ + --no-default-labels \ + --labels self-hosted,linux.ppc64le + +unset registration_token +rm -f "$token_file" + +# Run one job. +./run.sh + +echo "Ephemeral runner workflow completed." diff --git a/.github/scripts/ppc64le-ci/self-hosted-builder/fs/usr/bin/entrypoint b/.github/scripts/ppc64le-ci/self-hosted-builder/fs/usr/bin/entrypoint new file mode 100755 index 000000000000..14f6c84ca602 --- /dev/null +++ b/.github/scripts/ppc64le-ci/self-hosted-builder/fs/usr/bin/entrypoint @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# +# Container entrypoint that waits for all spawned processes. +# + +set -e -u + +# Create a FIFO and start reading from its read end. +tempdir=$(mktemp -d "/tmp/done.XXXXXXXXXX") +trap 'rm -r "$tempdir"' EXIT +done="$tempdir/pipe" +mkfifo "$done" +cat "$done" & waiter=$! + +# Start the workload. Its descendants will inherit the FIFO's write end. +status=0 +if [ "$#" -eq 0 ]; then + bash 9>"$done" || status=$? +else + "$@" 9>"$done" || status=$? +fi + +# When the workload and all of its descendants exit, the FIFO's write end will +# be closed and `cat "$done"` will exit. Wait until it happens. This is needed +# in order to handle SelfUpdater, which the workload may start in background +# before exiting. +wait "$waiter" + +exit "$status" diff --git a/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/app_token.sh b/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/app_token.sh new file mode 100755 index 000000000000..cecde970b84b --- /dev/null +++ b/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/app_token.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Request an ACCESS_TOKEN to be used by a GitHub APP +# Environment variable that need to be set up: +# * APP_ID, the GitHub's app ID +# * INSTALL_ID, the Github's app's installation ID +# * APP_PRIVATE_KEY, the content of GitHub app's private key in PEM format. +# +# https://github.com/orgs/community/discussions/24743#discussioncomment-3245300 +# + +set -o pipefail + +set -e # Exit on error + +# Generate JWT +header='{"alg":"RS256","typ":"JWT"}' +payload="{\"iat\":$(date +%s),\"exp\":$(( $(date +%s) + 600 )),\"iss\":${APP_ID}}" + +header_base64=$(echo -n "$header" | openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n') +payload_base64=$(echo -n "$payload" | openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n') + +signature=$(echo -n "${header_base64}.${payload_base64}" | \ + openssl dgst -sha256 -sign "${APP_PRIVATE_KEY}" | \ + openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n') + +generated_jwt="${header_base64}.${payload_base64}.${signature}" + +API_VERSION=v3 +API_HEADER="Accept: application/vnd.github+json" + +auth_header="Authorization: Bearer ${generated_jwt}" + +app_installations_response=$(curl -sX POST \ + -H "${auth_header}" \ + -H "${API_HEADER}" \ + --url "https://api.github.com/app/installations/${INSTALL_ID}/access_tokens" \ + ) + +echo "$app_installations_response" | jq --raw-output '.token' diff --git a/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/gh_cat_token.sh b/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/gh_cat_token.sh new file mode 100755 index 000000000000..2274e5a13c74 --- /dev/null +++ b/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/gh_cat_token.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +TOKEN_FILE=$1 +OUTPUT_FILE=$2 + +echo "Starting gh_cat_token.sh with TOKEN_FILE=${TOKEN_FILE}, OUTPUT_FILE=${OUTPUT_FILE}" + +# Validate inputs +if [[ ! -r "${TOKEN_FILE}" ]]; then + echo "Error: Token file '${TOKEN_FILE}' does not exist or is not readable." + exit 1 +fi + +# Write the token to the output file +cat "${TOKEN_FILE}" > "${OUTPUT_FILE}" +echo "Token written to ${OUTPUT_FILE}" diff --git a/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/gh_token_generator.sh b/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/gh_token_generator.sh new file mode 100755 index 000000000000..1feee26eb2c1 --- /dev/null +++ b/.github/scripts/ppc64le-ci/self-hosted-builder/helpers/gh_token_generator.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +SCRIPT_DIR=$(dirname "$0") +APP_ID=$1 +INSTALL_ID=$2 +APP_PRIVATE_KEY=$3 +DST_FILE="$4" + +ACCESS_TOKEN="$(APP_ID="$(<"${APP_ID}")" INSTALL_ID="$(<"${INSTALL_ID}")" APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" "${SCRIPT_DIR}/app_token.sh")" +echo "${ACCESS_TOKEN}" > "${DST_FILE}" diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml index b8045f5e20fb..fd31bb273312 100644 --- a/.github/workflows/_linux-build.yml +++ b/.github/workflows/_linux-build.yml @@ -100,7 +100,7 @@ jobs: build: environment: ${{ github.ref == 'refs/heads/main' && 'scribe-protected' || startsWith(github.ref, 'refs/heads/release/') && 'scribe-protected' || contains(github.event.pull_request.labels.*.name, 'ci-scribe') && 'scribe-pr' || '' }} # Don't run on forked repos - if: github.repository_owner == 'pytorch' + runs-on: ${{ inputs.runner_prefix}}${{ inputs.runner }} timeout-minutes: 240 outputs: @@ -109,7 +109,7 @@ jobs: steps: - name: Setup SSH (Click me for login details) uses: pytorch/test-infra/.github/actions/setup-ssh@main - if: inputs.build-environment != 'linux-s390x-binary-manywheel' + if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -119,16 +119,17 @@ jobs: # checkout. In other cases you should prefer a local checkout. - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@main + if: inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' with: no-sudo: true - name: Setup Linux uses: ./.github/actions/setup-linux - if: inputs.build-environment != 'linux-s390x-binary-manywheel' + if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' - name: configure aws credentials uses: aws-actions/configure-aws-credentials@v3 - if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }} + if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' }} with: role-to-assume: ${{ inputs.aws-role-to-assume }} role-session-name: gha-linux-build @@ -137,13 +138,13 @@ jobs: - name: Calculate docker image id: calculate-docker-image uses: pytorch/test-infra/.github/actions/calculate-docker-image@main - if: inputs.build-environment != 'linux-s390x-binary-manywheel' + if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' with: docker-image-name: ${{ inputs.docker-image-name }} - name: Use following to pull public copy of the image id: print-ghcr-mirror - if: inputs.build-environment != 'linux-s390x-binary-manywheel' + if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' env: ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} shell: bash @@ -153,24 +154,26 @@ jobs: - name: Pull docker image uses: pytorch/test-infra/.github/actions/pull-docker-image@main - if: inputs.build-environment != 'linux-s390x-binary-manywheel' + if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Parse ref id: parse-ref + if: inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' run: .github/scripts/parse_ref.py - name: Get workflow job id id: get-job-id uses: ./.github/actions/get-workflow-job-id - if: always() + if: always() && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' with: github-token: ${{ secrets.GITHUB_TOKEN }} # Apply the filter logic to the build step too if the test-config label is already there - name: Select all requested test configurations (if the test matrix is available) id: filter + if: inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' uses: ./.github/actions/filter-test-configs with: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -181,14 +184,14 @@ jobs: - name: Download pytest cache uses: ./.github/actions/pytest-cache-download continue-on-error: true - if: inputs.build-environment != 'linux-s390x-binary-manywheel' + if: inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' with: cache_dir: .pytest_cache job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }} s3_bucket: ${{ inputs.s3-bucket }} - name: Build - if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == '' + if: (steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == '') && (inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9') id: build env: BUILD_ENVIRONMENT: ${{ inputs.build-environment }} @@ -275,6 +278,16 @@ jobs: END_TIME=$(date +%s) echo "build_time=$((END_TIME - START_TIME))" >> "$GITHUB_OUTPUT" + - name: Execute Build and Tests inside ppc64le Docker Container + if: inputs.build-environment == 'linux-ppc64le-binary-manywheel-ubi9' + run: | + CONTAINER_NAME="temp_builder_${RUN_ID}" + docker run -d --name "$CONTAINER_NAME" pytorch-ppc64le:ubi9.3 /ppc64le-build.sh + docker wait "$CONTAINER_NAME" + docker logs "$CONTAINER_NAME" + docker cp "$CONTAINER_NAME":/workspace/pytorch/dist/. dist/ + docker rm "$CONTAINER_NAME" + - name: Archive artifacts into zip if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' run: | @@ -282,7 +295,7 @@ jobs: - name: Store PyTorch Build Artifacts on S3 uses: seemethere/upload-artifact-s3@v5 - if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel' + if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' with: name: ${{ inputs.build-environment }} retention-days: 14 @@ -292,7 +305,7 @@ jobs: - name: Store PyTorch Build Artifacts on S3 for split build uses: seemethere/upload-artifact-s3@v5 - if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel' + if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' with: name: ${{ inputs.build-environment }}-experimental-split-build retention-days: 14 @@ -318,8 +331,27 @@ jobs: if-no-files-found: error path: artifacts.zip + - name: Archive ppc64le artifacts into zip + if: inputs.build-environment == 'linux-ppc64le-binary-manywheel-ubi9' + run: | + zip -1 -r artifacts.zip dist/ + + + - name: Store PyTorch Build Artifacts for ppc64le + uses: actions/upload-artifact@v4 + if: inputs.build-environment == 'linux-ppc64le-binary-manywheel-ubi9' + with: + name: ${{ inputs.build-environment }}-ubi9 + retention-days: 14 + if-no-files-found: error + path: artifacts.zip + + - name: Cleanup dangling Docker images for ppc64le + if: always() && inputs.build-environment == 'linux-ppc64le-binary-manywheel-ubi9' + run: docker image prune -f + - name: Upload sccache stats - if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel' + if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9' uses: ./.github/actions/upload-sccache-stats with: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -327,7 +359,7 @@ jobs: - name: Teardown Linux uses: pytorch/test-infra/.github/actions/teardown-linux@main - if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel' + if: always() && (inputs.build-environment != 'linux-s390x-binary-manywheel' && inputs.build-environment != 'linux-ppc64le-binary-manywheel-ubi9') - name: Cleanup docker if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel' diff --git a/.github/workflows/ppc64le.yml b/.github/workflows/ppc64le.yml new file mode 100755 index 000000000000..fb26c1bfc5a3 --- /dev/null +++ b/.github/workflows/ppc64le.yml @@ -0,0 +1,37 @@ +name: ppc64le + +on: + push: + branches: + - main + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: true + +jobs: + linux-ppc64le-docker-image-build: + name: Build docker image for ppc64le + runs-on: linux.ppc64le + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: true + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build Docker image for ppc64le + run: | + docker build -f .ci/docker/manywheel/Dockerfile_ppc64le -t pytorch-ppc64le:ubi9.3 . + + ppc64le-UBI-9-3-Build-and-Test: + name: ppc64le-UBI-9-3-Build-and-Test + uses: ./.github/workflows/_linux-build.yml + needs: linux-ppc64le-docker-image-build + with: + build-environment: linux-ppc64le-binary-manywheel-ubi9 + docker-image-name: pytorch-ppc64le:ubi9.3 + runner: linux.ppc64le + \ No newline at end of file