diff --git a/.github/actions/dependencies-action/action.yml b/.github/actions/dependencies-action/action.yml deleted file mode 100644 index 051472c..0000000 --- a/.github/actions/dependencies-action/action.yml +++ /dev/null @@ -1,92 +0,0 @@ -# An action for installing dependencies. You can set -# custom pip dependencies as following: -# -# Usage: -# -# - name: Install dependencies -# uses: ./.github/actions/dependencies-action -# with: -# pip_packages: | -# - torch -# - triton -# pip_requirements: | -# - /path/a/requirements.txt -# - /path/b/requirements.txt --no-deps - -name: Install dependencies -description: Install OS and pip dependencies - -inputs: - pip_packages: - description: List of pip packages to install - required: false - default: '' - pip_requirements: - description: List of requirements files to install - required: false - default: '' - -runs: - using: composite - steps: - # TODO(shink): Remove this step after building devel images - - name: Install system dependencies - shell: bash - env: - DEBIAN_FRONTEND: noninteractive - run: | - echo "::group::Install system dependencies" - sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list - apt update - apt install --no-install-recommends -y \ - git \ - gcc-10 \ - g++-10 \ - make \ - cmake \ - ninja-build - echo "::endgroup::" - - - name: Create symlinks for gcc and g++ - shell: bash - run: | - ln -s /usr/bin/gcc-10 /usr/bin/gcc - ln -s /usr/bin/g++-10 /usr/bin/g++ - - - name: Show versions - shell: bash - run: | - set -x - python --version - pip --version - gcc --version - g++ --version - make --version - cmake --version - ninja --version - - - name: Install extra pip packages - if: ${{ inputs.pip_packages != '' }} - shell: bash - run: | - echo "${{ inputs.pip_packages }}" | while read -r package; do - package="${package#- }" - if [ -n "$(echo "$package" | xargs)" ]; then - echo "::group::Installing pip package: $package" - pip install $package -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple - echo "::endgroup::" - fi - done - - - name: Install extra pip requirements - if: ${{ inputs.pip_requirements != '' }} - shell: bash - run: | - echo "${{ inputs.pip_requirements }}" | while read -r requirement; do - requirement="${requirement#- }" - if [ -n "$(echo "$requirement" | xargs)" ]; then - echo "::group::Installing from requirements: $requirement" - pip install -r $requirement -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple - echo "::endgroup::" - fi - done diff --git a/.github/actions/fetch-and-rebase/action.yml b/.github/actions/fetch-and-rebase/action.yml deleted file mode 100644 index fa72a6a..0000000 --- a/.github/actions/fetch-and-rebase/action.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Fetch and Rebase - -description: Fetch and rebase for repository - -inputs: - repo_path: - description: the path the repository locate on - required: true - remote_branch: - description: the branch rebased from - required: true - loop: - description: loop times when fails - required: true - -runs: - using: composite - steps: - - name: Fetch and rebase - env: - REPO_PATH: ${{ inputs.repo_path }} - REMOTE_BRANCH: ${{ inputs.remote_branch }} - LOOP: ${{ inputs.loop }} - shell: bash - run: | - set +e - - COUNT=0 - - for i in $(seq 1 ${LOOP}) - do - pushd ${REPO_PATH} && - git fetch --all && - git rebase ${REMOTE_BRANCH} && - git submodule sync && - git submodule update --init --recursive && - git reset --hard HEAD && - git clean -dfx && - git submodule foreach git reset --hard HEAD && - git submodule foreach git clean -dfx && - popd - - if [[ $? -ne 0 ]] - then - let COUNT++ - else - break - fi - done - - [[ ${COUNT} -lt ${LOOP} ]] && true || false diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml new file mode 100644 index 0000000..dfcfc5e --- /dev/null +++ b/.github/workflows/_ascend_npu_build.yml @@ -0,0 +1,98 @@ +name: '_ascend_npu_build' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be used to build' + outputs: + artifact_name: + description: 'The torch_npu distribution artifact name' + value: ${{ jobs.build.outputs.dist_name }} + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + build: + name: build torch_npu + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + outputs: + dist_name: ${{ steps.list-dist.outputs.dist_name }} + steps: + # TODO(shink): Should we add these dependencies to the image? + - name: Install system dependencies + run: | + sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list + apt update + apt install --no-install-recommends -y git gcc g++ make cmake ninja-build + + - name: Checkout + uses: actions/checkout@v4 + + - name: Checkout torch_npu + uses: actions/checkout@v4 + with: + # TODO(shink): Use Ascend/pytorch once this pr merged: + # https://gitee.com/ascend/pytorch/pulls/12854 + # repository: Ascend/pytorch + repository: shink/torchnpu + ref: feat/autoload + submodules: recursive + path: torch_npu + + - name: Install pip dependencies + working-directory: torch_npu + run: | + pip install wheel + pip install -r requirements.txt + + - name: List Python version + id: list-py-version + working-directory: torch_npu + run: | + py_version=$(python --version | awk '{print $2}' | cut -d '.' -f 1,2) + echo "py_version=${py_version}" >> $GITHUB_OUTPUT + + - name: Build torch_npu + working-directory: torch_npu + run: | + bash ci/build.sh --python=${{ steps.list-py-version.outputs.py_version }} + + - name: List distribution package + id: list-dist + working-directory: torch_npu/dist + run: | + dist_name=$(ls torch_npu*.whl) + dist_path=$(pwd)/${dist_name} + echo "dist_name=${dist_name}" >> $GITHUB_OUTPUT + echo "dist_path=${dist_path}" >> $GITHUB_OUTPUT + + - name: Upload distribution artifact + id: upload-dist + continue-on-error: true + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.list-dist.outputs.dist_name }} + path: ${{ steps.list-dist.outputs.dist_path }} + if-no-files-found: error + retention-days: 1 + overwrite: true + + - name: Write to workflow job summary + if: ${{ steps.upload-dist.outputs.artifact-url }} + run: | + echo "## torch_npu built successfully! :rocket:" >> $GITHUB_STEP_SUMMARY + echo "You can download the distribution package [here](${{ steps.upload-dist.outputs.artifact-url }})." >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_test.yml new file mode 100644 index 0000000..37932ce --- /dev/null +++ b/.github/workflows/_ascend_npu_test.yml @@ -0,0 +1,95 @@ +name: '_ascend_npu_test' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be loaded' + device: + required: true + type: string + description: 'The device selected to run on' + artifact_name: + required: true + type: string + description: 'The torch_npu distribution artifact name' + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + test: + name: test torch_npu in ${{ inputs.image }} with ${{ inputs.device }} + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + volumes: + - /usr/local/dcmi:/usr/local/dcmi + - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi + - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ + - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /etc/ascend_install.info:/etc/ascend_install.info + options: >- + --network host + --device ${{ inputs.device }} + --device /dev/davinci_manager + --device /dev/devmm_svm + --device /dev/hisi_hdc + steps: + - name: Show NPU info + run: | + npu-smi info + + - name: Install system dependencies + run: | + sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list + apt update + apt install --no-install-recommends -y git gcc g++ make cmake ninja-build + + - name: Checkout + uses: actions/checkout@v4 + + - name: Checkout torch_npu + uses: actions/checkout@v4 + with: + # TODO(shink): Use Ascend/pytorch once this pr merged: + # https://gitee.com/ascend/pytorch/pulls/12854 + # repository: Ascend/pytorch + repository: shink/torchnpu + ref: feat/autoload + submodules: recursive + path: torch_npu + + - name: Download distribution artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: torch_npu + + - name: Install pip dependencies + working-directory: torch_npu + run: | + pip install wheel unittest-xml-reporting + pip install -r requirements.txt + pip install -r test/requirements.txt --no-deps + + - name: Install torch_npu + working-directory: torch_npu + run: | + pip install ${{ inputs.artifact_name }} + + # TODO(shink): Skip + - name: Do the test + continue-on-error: true + run: | + python torch_npu/ci/access_control_test.py diff --git a/.github/workflows/_build-and-test.yml b/.github/workflows/_build-and-test.yml deleted file mode 100644 index 3d71a13..0000000 --- a/.github/workflows/_build-and-test.yml +++ /dev/null @@ -1,153 +0,0 @@ -name: 'build-test' - -on: - workflow_call: - inputs: - runner: - required: true - type: string - description: The runner selected to run on - device: - required: true - type: string - description: The device selected to run on - image: - required: true - type: string - description: The docker image which will be loaded - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -jobs: - build: - name: build torch_npu - runs-on: ${{ inputs.runner }} - container: - image: ${{ inputs.image }} - volumes: - - /home/runner/actions-runner/codes:/root/codes - outputs: - dist_name: ${{ steps.list-dist.outputs.dist_name }} - steps: - - name: Prepare the codes - run: | - cp -rf /root/codes /root/build - - - name: Checkout - uses: actions/checkout@v4 - - - name: Install dependencies - uses: ./.github/actions/dependencies-action - with: - pip_packages: | - - wheel - pip_requirements: | - - /root/build/npu/pytorch/requirements.txt - - - name: Build torch_npu - working-directory: /root/build/npu/pytorch - run: | - py_version=$(python --version | awk '{print $2}' | cut -d '.' -f 1,2) - bash ci/build.sh --python=${py_version} - - - name: List distribution package - id: list-dist - working-directory: /root/build/npu/pytorch/dist - run: | - dist_name=$(ls torch_npu*.whl) - dist_path=$(pwd)/${dist_name} - echo "dist_name=${dist_name}" >> $GITHUB_OUTPUT - echo "dist_path=${dist_path}" >> $GITHUB_OUTPUT - - - name: Upload distribution artifact - id: upload-dist - continue-on-error: true - uses: actions/upload-artifact@v4 - with: - name: ${{ steps.list-dist.outputs.dist_name }} - path: ${{ steps.list-dist.outputs.dist_path }} - retention-days: 1 - - - name: Write to workflow job summary - if: ${{ steps.upload-dist.outputs.artifact-url }} - run: | - echo "## torch_npu built successfully! :rocket:" >> $GITHUB_STEP_SUMMARY - echo "You can download the distribution package [here](${{ steps.upload-dist.outputs.artifact-url }})." >> $GITHUB_STEP_SUMMARY - - test: - name: test torch_npu in ${{ inputs.image }} with ${{ inputs.device }} - runs-on: ${{ inputs.runner }} - needs: - - build - container: - image: ${{ inputs.image }} - volumes: - - /usr/local/dcmi:/usr/local/dcmi - - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ - - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info - - /etc/ascend_install.info:/etc/ascend_install.info - - /home/runner/actions-runner/codes:/root/codes - options: >- - --network host - --device ${{ inputs.device }} - --device /dev/davinci_manager - --device /dev/devmm_svm - --device /dev/hisi_hdc - steps: - - name: Show NPU info - run: | - npu-smi info - - - name: Prepare the codes - run: | - cp -rf /root/codes /root/build - - - name: Download distribution artifact - uses: actions/download-artifact@v4 - with: - name: ${{ needs.build.outputs.dist_name }} - path: /root/build - - - name: Checkout - uses: actions/checkout@v4 - - - name: Install dependencies - uses: ./.github/actions/dependencies-action - with: - pip_packages: | - - wheel - - unittest-xml-reporting - - importlib-metadata - pip_requirements: | - - /root/build/npu/pytorch/requirements.txt - - /root/build/npu/pytorch/test/requirements.txt --no-deps - - - name: Install torch_npu - working-directory: /root/build - run: | - pip install ${{ needs.build.outputs.dist_name }} - - # TODO(shink): Skip - - name: Do the test - continue-on-error: true - working-directory: /root/build - run: | - python npu/pytorch/ci/access_control_test.py - env: - DISABLED_TESTS_FILE: /root/build/npu/pytorch/test/unsupported_test_cases/.pytorch-disabled-tests.json - - - name: Train GPT2 - working-directory: ./test - run: | - pip install -r requirements.txt - pip install accelerate -U - python gpt2_test.py - env: - IS_CI: true - HF_ENDPOINT: https://hf-mirror.com diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index 95711c7..f0e226b 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -1,6 +1,3 @@ -# Note: -# Same runner only needs one job named like .*fetch-and-rebase - name: Ascend NPU Test Suite on: @@ -29,6 +26,16 @@ on: - npu-arm64 default: 'self-hosted' description: 'The runner selected to run on' + image: + required: true + type: choice + options: + - ascendai/cann:7.1-openeuler2203sp2 + - ascendai/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.9 + - ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9 + - ascendai/cann:latest + default: 'ascendai/cann:latest' + description: 'The docker image which will be loaded' device: required: true type: choice @@ -43,19 +50,10 @@ on: - /dev/davinci8 default: '/dev/davinci6' description: 'The device selected to run on' - image: - required: true - type: choice - options: - - ascendai/cann:7.1-openeuler2203sp2 - - ascendai/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.9 - - ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9 - default: 'ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9' - description: 'The docker image which will be loaded' # Only cancel the previous runs when triggered by a pull request concurrency: - group: '${{ github.workflow }}-${{ github.event_name }}' + group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }} cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: @@ -64,8 +62,8 @@ jobs: runs-on: ubuntu-latest outputs: runner: ${{ steps.set-param.outputs.runner }} - device: ${{ steps.set-param.outputs.device }} image: ${{ steps.set-param.outputs.image }} + device: ${{ steps.set-param.outputs.device }} steps: - name: Set param id: set-param @@ -73,28 +71,25 @@ jobs: set -e echo "runner=${{ github.event.inputs.runner || 'self-hosted' }}" >> $GITHUB_OUTPUT echo "device=${{ github.event.inputs.device || '/dev/davinci6' }}" >> $GITHUB_OUTPUT - echo "image=${{ github.event.inputs.image || 'ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9' }}" >> $GITHUB_OUTPUT + echo "image=${{ github.event.inputs.image || 'ascendai/cann:latest' }}" >> $GITHUB_OUTPUT - fetch-and-rebase: - name: Fetch and rebase - runs-on: ${{ needs.prepare.outputs.runner }} + build: + name: Build torch_npu needs: - prepare - steps: - - name: Pull latest codes for torch_npu - uses: ./.github/actions/fetch-and-rebase - with: - repo_path: /home/runner/actions-runner/codes/npu/pytorch - remote_branch: upstream/master - loop: 10 + uses: ./.github/workflows/_ascend_npu_build.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} - build-and-test: - name: Build and test + test: + name: Test torch_npu needs: - prepare - - fetch-and-rebase - uses: ./.github/workflows/_build-and-test.yml + - build + uses: ./.github/workflows/_ascend_npu_test.yml with: runner: ${{ needs.prepare.outputs.runner }} - device: ${{ needs.prepare.outputs.device }} image: ${{ needs.prepare.outputs.image }} + device: ${{ needs.prepare.outputs.device }} + artifact_name: ${{ needs.build.outputs.artifact_name }}