From afbe5567e2791de0d6167e7f4dd93068331f56b5 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Sat, 12 Oct 2024 14:56:38 +0800 Subject: [PATCH 01/13] update --- .github/workflows/_ascend_npu_build.yml | 83 +++++++++++++ .github/workflows/_ascend_npu_test.yml | 91 ++++++++++++++ .github/workflows/_build-and-test.yml | 153 ------------------------ .github/workflows/ascend_npu_test.yml | 48 +++++++- 4 files changed, 218 insertions(+), 157 deletions(-) create mode 100644 .github/workflows/_ascend_npu_build.yml create mode 100644 .github/workflows/_ascend_npu_test.yml delete mode 100644 .github/workflows/_build-and-test.yml diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml new file mode 100644 index 0000000..74fe691 --- /dev/null +++ b/.github/workflows/_ascend_npu_build.yml @@ -0,0 +1,83 @@ +name: '_ascend_npu_build' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be used to build' + outputs: + artifact_name: + description: 'The result of the called workflow' + value: ${{ steps.list-dist.outputs.dist_name }} + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + build: + name: build torch_npu + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + volumes: + - /home/runner/actions-runner/codes:/root/codes + steps: + - name: Prepare the codes + run: | + cp -rf /root/codes /root/build + + - name: Checkout + uses: actions/checkout@v4 + + - name: Install dependencies + uses: ./.github/actions/dependencies-action + with: + pip_packages: | + - wheel + pip_requirements: | + - /root/build/npu/pytorch/requirements.txt + + - name: List Python version + id: list-py-version + run: | + py_version=$(python --version | awk '{print $2}' | cut -d '.' -f 1,2) + echo "py_version=${py_version}" >> $GITHUB_OUTPUT + + - name: Build torch_npu + working-directory: /root/build/npu/pytorch + run: | + bash ci/build.sh --python=${{ steps.list-py-version.outputs.py_version }} + + - name: List distribution package + id: list-dist + working-directory: /root/build/npu/pytorch/dist + run: | + dist_name=$(ls torch_npu*.whl) + dist_path=$(pwd)/${dist_name} + echo "dist_name=${dist_name}" >> $GITHUB_OUTPUT + echo "dist_path=${dist_path}" >> $GITHUB_OUTPUT + + - name: Upload distribution artifact + id: upload-dist + continue-on-error: true + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.list-dist.outputs.dist_name }} + path: ${{ steps.list-dist.outputs.dist_path }} + retention-days: 1 + + - name: Write to workflow job summary + if: ${{ steps.upload-dist.outputs.artifact-url }} + run: | + echo "## torch_npu built successfully! :rocket:" >> $GITHUB_STEP_SUMMARY + echo "You can download the distribution package [here](${{ steps.upload-dist.outputs.artifact-url }})." >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_test.yml new file mode 100644 index 0000000..a7dcfb5 --- /dev/null +++ b/.github/workflows/_ascend_npu_test.yml @@ -0,0 +1,91 @@ +name: '_ascend_npu_test' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + device: + required: true + type: string + description: 'The device selected to run on' + image: + required: true + type: string + description: 'The docker image which will be loaded' + artifact_name: + required: true + type: string + description: 'The torch_npu distribution artifact name' + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + test: + name: test torch_npu in ${{ inputs.image }} with ${{ inputs.device }} + runs-on: ${{ inputs.runner }} + needs: + - build + container: + image: ${{ inputs.image }} + volumes: + - /usr/local/dcmi:/usr/local/dcmi + - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi + - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ + - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /etc/ascend_install.info:/etc/ascend_install.info + - /home/runner/actions-runner/codes:/root/codes + options: >- + --network host + --device ${{ inputs.device }} + --device /dev/davinci_manager + --device /dev/devmm_svm + --device /dev/hisi_hdc + steps: + - name: Show NPU info + run: | + npu-smi info + + - name: Prepare the codes + run: | + cp -rf /root/codes /root/build + + - name: Download distribution artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: /root/build + + - name: Checkout + uses: actions/checkout@v4 + + - name: Install dependencies + uses: ./.github/actions/dependencies-action + with: + pip_packages: | + - wheel + - unittest-xml-reporting + pip_requirements: | + - /root/build/npu/pytorch/requirements.txt + - /root/build/npu/pytorch/test/requirements.txt --no-deps + + - name: Install torch_npu + working-directory: /root/build + run: | + pip install ${{ inputs.artifact_name }} + + # TODO(shink): Skip + - name: Do the test + continue-on-error: true + working-directory: /root/build + run: | + python npu/pytorch/ci/access_control_test.py + env: + DISABLED_TESTS_FILE: /root/build/npu/pytorch/test/unsupported_test_cases/.pytorch-disabled-tests.json diff --git a/.github/workflows/_build-and-test.yml b/.github/workflows/_build-and-test.yml deleted file mode 100644 index 3d71a13..0000000 --- a/.github/workflows/_build-and-test.yml +++ /dev/null @@ -1,153 +0,0 @@ -name: 'build-test' - -on: - workflow_call: - inputs: - runner: - required: true - type: string - description: The runner selected to run on - device: - required: true - type: string - description: The device selected to run on - image: - required: true - type: string - description: The docker image which will be loaded - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -jobs: - build: - name: build torch_npu - runs-on: ${{ inputs.runner }} - container: - image: ${{ inputs.image }} - volumes: - - /home/runner/actions-runner/codes:/root/codes - outputs: - dist_name: ${{ steps.list-dist.outputs.dist_name }} - steps: - - name: Prepare the codes - run: | - cp -rf /root/codes /root/build - - - name: Checkout - uses: actions/checkout@v4 - - - name: Install dependencies - uses: ./.github/actions/dependencies-action - with: - pip_packages: | - - wheel - pip_requirements: | - - /root/build/npu/pytorch/requirements.txt - - - name: Build torch_npu - working-directory: /root/build/npu/pytorch - run: | - py_version=$(python --version | awk '{print $2}' | cut -d '.' -f 1,2) - bash ci/build.sh --python=${py_version} - - - name: List distribution package - id: list-dist - working-directory: /root/build/npu/pytorch/dist - run: | - dist_name=$(ls torch_npu*.whl) - dist_path=$(pwd)/${dist_name} - echo "dist_name=${dist_name}" >> $GITHUB_OUTPUT - echo "dist_path=${dist_path}" >> $GITHUB_OUTPUT - - - name: Upload distribution artifact - id: upload-dist - continue-on-error: true - uses: actions/upload-artifact@v4 - with: - name: ${{ steps.list-dist.outputs.dist_name }} - path: ${{ steps.list-dist.outputs.dist_path }} - retention-days: 1 - - - name: Write to workflow job summary - if: ${{ steps.upload-dist.outputs.artifact-url }} - run: | - echo "## torch_npu built successfully! :rocket:" >> $GITHUB_STEP_SUMMARY - echo "You can download the distribution package [here](${{ steps.upload-dist.outputs.artifact-url }})." >> $GITHUB_STEP_SUMMARY - - test: - name: test torch_npu in ${{ inputs.image }} with ${{ inputs.device }} - runs-on: ${{ inputs.runner }} - needs: - - build - container: - image: ${{ inputs.image }} - volumes: - - /usr/local/dcmi:/usr/local/dcmi - - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ - - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info - - /etc/ascend_install.info:/etc/ascend_install.info - - /home/runner/actions-runner/codes:/root/codes - options: >- - --network host - --device ${{ inputs.device }} - --device /dev/davinci_manager - --device /dev/devmm_svm - --device /dev/hisi_hdc - steps: - - name: Show NPU info - run: | - npu-smi info - - - name: Prepare the codes - run: | - cp -rf /root/codes /root/build - - - name: Download distribution artifact - uses: actions/download-artifact@v4 - with: - name: ${{ needs.build.outputs.dist_name }} - path: /root/build - - - name: Checkout - uses: actions/checkout@v4 - - - name: Install dependencies - uses: ./.github/actions/dependencies-action - with: - pip_packages: | - - wheel - - unittest-xml-reporting - - importlib-metadata - pip_requirements: | - - /root/build/npu/pytorch/requirements.txt - - /root/build/npu/pytorch/test/requirements.txt --no-deps - - - name: Install torch_npu - working-directory: /root/build - run: | - pip install ${{ needs.build.outputs.dist_name }} - - # TODO(shink): Skip - - name: Do the test - continue-on-error: true - working-directory: /root/build - run: | - python npu/pytorch/ci/access_control_test.py - env: - DISABLED_TESTS_FILE: /root/build/npu/pytorch/test/unsupported_test_cases/.pytorch-disabled-tests.json - - - name: Train GPT2 - working-directory: ./test - run: | - pip install -r requirements.txt - pip install accelerate -U - python gpt2_test.py - env: - IS_CI: true - HF_ENDPOINT: https://hf-mirror.com diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index 95711c7..3691ea7 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -55,7 +55,7 @@ on: # Only cancel the previous runs when triggered by a pull request concurrency: - group: '${{ github.workflow }}-${{ github.event_name }}' + group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }} cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: @@ -75,6 +75,7 @@ jobs: echo "device=${{ github.event.inputs.device || '/dev/davinci6' }}" >> $GITHUB_OUTPUT echo "image=${{ github.event.inputs.image || 'ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9' }}" >> $GITHUB_OUTPUT + # TODO(shink): remove this job, use actions/checkout instead fetch-and-rebase: name: Fetch and rebase runs-on: ${{ needs.prepare.outputs.runner }} @@ -88,13 +89,52 @@ jobs: remote_branch: upstream/master loop: 10 - build-and-test: - name: Build and test + build: + name: Build torch_npu needs: - prepare - fetch-and-rebase - uses: ./.github/workflows/_build-and-test.yml + uses: ./.github/workflows/_ascend_npu_build.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + + test: + name: Test torch_npu + needs: + - prepare + - build + uses: ./.github/workflows/_ascend_npu_test.yml with: runner: ${{ needs.prepare.outputs.runner }} device: ${{ needs.prepare.outputs.device }} image: ${{ needs.prepare.outputs.image }} + artifact_name: ${{ needs.build.outputs.artifact_name }} + +# pytorch-examples: +# name: Run PyTorch examples +# needs: +# - prepare +# - build +# runs-on: ${{ needs.prepare.outputs.runner }} +# +# pytorch-benchmark: +# name: Run PyTorch benchmark +# needs: +# - prepare +# - build +# runs-on: ${{ needs.prepare.outputs.runner }} +# +# transformers-examples: +# name: Run transformers examples +# needs: +# - prepare +# - build +# runs-on: ${{ needs.prepare.outputs.runner }} +# +# transformers-timm: +# name: Run PyTorch examples +# needs: +# - prepare +# - build +# runs-on: ${{ needs.prepare.outputs.runner }} From f6a2db8e1c848ad773e40558e8cd2a0681610b88 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Sat, 12 Oct 2024 15:00:20 +0800 Subject: [PATCH 02/13] update --- .github/workflows/_ascend_npu_build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index 74fe691..37f1ddc 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -14,7 +14,7 @@ on: outputs: artifact_name: description: 'The result of the called workflow' - value: ${{ steps.list-dist.outputs.dist_name }} + value: ${{ jobs.build.outputs.dist_name }} # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly # declared as "shell: bash -el {0}" on steps that need to be properly activated. @@ -31,6 +31,8 @@ jobs: image: ${{ inputs.image }} volumes: - /home/runner/actions-runner/codes:/root/codes + outputs: + dist_name: ${{ steps.list-dist.outputs.dist_name }} steps: - name: Prepare the codes run: | From 6f83cce8f0224fd5f3ffcdf300ce118e524bb16a Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Sat, 12 Oct 2024 15:01:35 +0800 Subject: [PATCH 03/13] update --- .github/workflows/_ascend_npu_build.yml | 2 +- .github/workflows/_ascend_npu_test.yml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index 37f1ddc..5cf5003 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -13,7 +13,7 @@ on: description: 'The docker image which will be used to build' outputs: artifact_name: - description: 'The result of the called workflow' + description: 'The torch_npu distribution artifact name' value: ${{ jobs.build.outputs.dist_name }} # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_test.yml index a7dcfb5..e905f74 100644 --- a/.github/workflows/_ascend_npu_test.yml +++ b/.github/workflows/_ascend_npu_test.yml @@ -31,8 +31,6 @@ jobs: test: name: test torch_npu in ${{ inputs.image }} with ${{ inputs.device }} runs-on: ${{ inputs.runner }} - needs: - - build container: image: ${{ inputs.image }} volumes: From f45f49c3ca890d7cac2cef313b1d8fb070ea46f1 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Sat, 12 Oct 2024 15:29:05 +0800 Subject: [PATCH 04/13] 001: all files --- .github/workflows/_ascend_npu_build.yml | 2 + .../_ascend_npu_run_pytorch_benchmark.yml | 58 ++++++++++++ .../_ascend_npu_run_pytorch_examples.yml | 58 ++++++++++++ .../_ascend_npu_run_transformers_examples.yml | 58 ++++++++++++ .../_ascend_npu_run_transformers_timm.yml | 58 ++++++++++++ .github/workflows/_ascend_npu_test.yml | 8 +- .github/workflows/ascend_npu_test.yml | 94 +++++++++++-------- 7 files changed, 295 insertions(+), 41 deletions(-) create mode 100644 .github/workflows/_ascend_npu_run_pytorch_benchmark.yml create mode 100644 .github/workflows/_ascend_npu_run_pytorch_examples.yml create mode 100644 .github/workflows/_ascend_npu_run_transformers_examples.yml create mode 100644 .github/workflows/_ascend_npu_run_transformers_timm.yml diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index 5cf5003..4738c44 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -76,7 +76,9 @@ jobs: with: name: ${{ steps.list-dist.outputs.dist_name }} path: ${{ steps.list-dist.outputs.dist_path }} + if-no-files-found: error retention-days: 1 + overwrite: true - name: Write to workflow job summary if: ${{ steps.upload-dist.outputs.artifact-url }} diff --git a/.github/workflows/_ascend_npu_run_pytorch_benchmark.yml b/.github/workflows/_ascend_npu_run_pytorch_benchmark.yml new file mode 100644 index 0000000..d2e984b --- /dev/null +++ b/.github/workflows/_ascend_npu_run_pytorch_benchmark.yml @@ -0,0 +1,58 @@ +name: '_ascend_npu_pytorch_benchmark' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be loaded' + device: + required: true + type: string + description: 'The device selected to run on' + artifact_name: + required: true + type: string + description: 'The torch_npu distribution artifact name' + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + test: + name: run pytorch benchmark in ${{ inputs.image }} with ${{ inputs.device }} + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + volumes: + - /usr/local/dcmi:/usr/local/dcmi + - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi + - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ + - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /etc/ascend_install.info:/etc/ascend_install.info + - /home/runner/actions-runner/codes:/root/codes + options: >- + --network host + --device ${{ inputs.device }} + --device /dev/davinci_manager + --device /dev/devmm_svm + --device /dev/hisi_hdc + steps: + - name: Show NPU info + run: | + npu-smi info + + - name: Download distribution artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: /root/build diff --git a/.github/workflows/_ascend_npu_run_pytorch_examples.yml b/.github/workflows/_ascend_npu_run_pytorch_examples.yml new file mode 100644 index 0000000..45deb00 --- /dev/null +++ b/.github/workflows/_ascend_npu_run_pytorch_examples.yml @@ -0,0 +1,58 @@ +name: '_ascend_npu_pytorch_examples' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be loaded' + device: + required: true + type: string + description: 'The device selected to run on' + artifact_name: + required: true + type: string + description: 'The torch_npu distribution artifact name' + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + test: + name: run pytorch examples in ${{ inputs.image }} with ${{ inputs.device }} + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + volumes: + - /usr/local/dcmi:/usr/local/dcmi + - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi + - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ + - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /etc/ascend_install.info:/etc/ascend_install.info + - /home/runner/actions-runner/codes:/root/codes + options: >- + --network host + --device ${{ inputs.device }} + --device /dev/davinci_manager + --device /dev/devmm_svm + --device /dev/hisi_hdc + steps: + - name: Show NPU info + run: | + npu-smi info + + - name: Download distribution artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: /root/build diff --git a/.github/workflows/_ascend_npu_run_transformers_examples.yml b/.github/workflows/_ascend_npu_run_transformers_examples.yml new file mode 100644 index 0000000..a9d5f8c --- /dev/null +++ b/.github/workflows/_ascend_npu_run_transformers_examples.yml @@ -0,0 +1,58 @@ +name: '_ascend_npu_transformers_examples' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be loaded' + device: + required: true + type: string + description: 'The device selected to run on' + artifact_name: + required: true + type: string + description: 'The torch_npu distribution artifact name' + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + test: + name: run transformers examples in ${{ inputs.image }} with ${{ inputs.device }} + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + volumes: + - /usr/local/dcmi:/usr/local/dcmi + - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi + - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ + - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /etc/ascend_install.info:/etc/ascend_install.info + - /home/runner/actions-runner/codes:/root/codes + options: >- + --network host + --device ${{ inputs.device }} + --device /dev/davinci_manager + --device /dev/devmm_svm + --device /dev/hisi_hdc + steps: + - name: Show NPU info + run: | + npu-smi info + + - name: Download distribution artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: /root/build diff --git a/.github/workflows/_ascend_npu_run_transformers_timm.yml b/.github/workflows/_ascend_npu_run_transformers_timm.yml new file mode 100644 index 0000000..cdb084f --- /dev/null +++ b/.github/workflows/_ascend_npu_run_transformers_timm.yml @@ -0,0 +1,58 @@ +name: '_ascend_npu_transformers_timm' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be loaded' + device: + required: true + type: string + description: 'The device selected to run on' + artifact_name: + required: true + type: string + description: 'The torch_npu distribution artifact name' + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + test: + name: run transformers timm in ${{ inputs.image }} with ${{ inputs.device }} + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + volumes: + - /usr/local/dcmi:/usr/local/dcmi + - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi + - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ + - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /etc/ascend_install.info:/etc/ascend_install.info + - /home/runner/actions-runner/codes:/root/codes + options: >- + --network host + --device ${{ inputs.device }} + --device /dev/davinci_manager + --device /dev/devmm_svm + --device /dev/hisi_hdc + steps: + - name: Show NPU info + run: | + npu-smi info + + - name: Download distribution artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: /root/build diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_test.yml index e905f74..06fbcdb 100644 --- a/.github/workflows/_ascend_npu_test.yml +++ b/.github/workflows/_ascend_npu_test.yml @@ -7,14 +7,14 @@ on: required: true type: string description: 'The runner selected to run on' - device: - required: true - type: string - description: 'The device selected to run on' image: required: true type: string description: 'The docker image which will be loaded' + device: + required: true + type: string + description: 'The device selected to run on' artifact_name: required: true type: string diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index 3691ea7..c5c35f1 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -29,6 +29,15 @@ on: - npu-arm64 default: 'self-hosted' description: 'The runner selected to run on' + image: + required: true + type: choice + options: + - ascendai/cann:7.1-openeuler2203sp2 + - ascendai/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.9 + - ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9 + default: 'ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9' + description: 'The docker image which will be loaded' device: required: true type: choice @@ -43,15 +52,6 @@ on: - /dev/davinci8 default: '/dev/davinci6' description: 'The device selected to run on' - image: - required: true - type: choice - options: - - ascendai/cann:7.1-openeuler2203sp2 - - ascendai/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.9 - - ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9 - default: 'ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9' - description: 'The docker image which will be loaded' # Only cancel the previous runs when triggered by a pull request concurrency: @@ -64,8 +64,8 @@ jobs: runs-on: ubuntu-latest outputs: runner: ${{ steps.set-param.outputs.runner }} - device: ${{ steps.set-param.outputs.device }} image: ${{ steps.set-param.outputs.image }} + device: ${{ steps.set-param.outputs.device }} steps: - name: Set param id: set-param @@ -107,34 +107,54 @@ jobs: uses: ./.github/workflows/_ascend_npu_test.yml with: runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} device: ${{ needs.prepare.outputs.device }} + artifact_name: ${{ needs.build.outputs.artifact_name }} + + pytorch-examples: + name: Run PyTorch examples + needs: + - prepare + - test + uses: ./.github/workflows/_ascend_npu_run_pytorch_examples.yml + with: + runner: ${{ needs.prepare.outputs.runner }} image: ${{ needs.prepare.outputs.image }} + device: ${{ needs.prepare.outputs.device }} artifact_name: ${{ needs.build.outputs.artifact_name }} -# pytorch-examples: -# name: Run PyTorch examples -# needs: -# - prepare -# - build -# runs-on: ${{ needs.prepare.outputs.runner }} -# -# pytorch-benchmark: -# name: Run PyTorch benchmark -# needs: -# - prepare -# - build -# runs-on: ${{ needs.prepare.outputs.runner }} -# -# transformers-examples: -# name: Run transformers examples -# needs: -# - prepare -# - build -# runs-on: ${{ needs.prepare.outputs.runner }} -# -# transformers-timm: -# name: Run PyTorch examples -# needs: -# - prepare -# - build -# runs-on: ${{ needs.prepare.outputs.runner }} + pytorch-benchmark: + name: Run PyTorch benchmark + needs: + - prepare + - pytorch-examples + uses: ./.github/workflows/_ascend_npu_run_pytorch_benchmark.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + device: ${{ needs.prepare.outputs.device }} + artifact_name: ${{ needs.build.outputs.artifact_name }} + + transformers-examples: + name: Run transformers examples + needs: + - prepare + - pytorch-benchmark + uses: ./.github/workflows/_ascend_npu_run_transformers_examples.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + device: ${{ needs.prepare.outputs.device }} + artifact_name: ${{ needs.build.outputs.artifact_name }} + + transformers-timm: + name: Run PyTorch image models(timm) + needs: + - prepare + - transformers-examples + uses: ./.github/workflows/_ascend_npu_run_transformers_timm.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + device: ${{ needs.prepare.outputs.device }} + artifact_name: ${{ needs.build.outputs.artifact_name }} From 797ff131c4847a504380c11f1558f5d3e8f27b7b Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Thu, 31 Oct 2024 09:41:47 +0800 Subject: [PATCH 05/13] revert --- ...nchmark.yml => _ascend_npu_run_models.yml} | 0 .../_ascend_npu_run_pytorch_examples.yml | 58 ------------------- .../_ascend_npu_run_transformers_examples.yml | 58 ------------------- .../_ascend_npu_run_transformers_timm.yml | 58 ------------------- .github/workflows/ascend_npu_test.yml | 38 +----------- 5 files changed, 1 insertion(+), 211 deletions(-) rename .github/workflows/{_ascend_npu_run_pytorch_benchmark.yml => _ascend_npu_run_models.yml} (100%) delete mode 100644 .github/workflows/_ascend_npu_run_pytorch_examples.yml delete mode 100644 .github/workflows/_ascend_npu_run_transformers_examples.yml delete mode 100644 .github/workflows/_ascend_npu_run_transformers_timm.yml diff --git a/.github/workflows/_ascend_npu_run_pytorch_benchmark.yml b/.github/workflows/_ascend_npu_run_models.yml similarity index 100% rename from .github/workflows/_ascend_npu_run_pytorch_benchmark.yml rename to .github/workflows/_ascend_npu_run_models.yml diff --git a/.github/workflows/_ascend_npu_run_pytorch_examples.yml b/.github/workflows/_ascend_npu_run_pytorch_examples.yml deleted file mode 100644 index 45deb00..0000000 --- a/.github/workflows/_ascend_npu_run_pytorch_examples.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: '_ascend_npu_pytorch_examples' - -on: - workflow_call: - inputs: - runner: - required: true - type: string - description: 'The runner selected to run on' - image: - required: true - type: string - description: 'The docker image which will be loaded' - device: - required: true - type: string - description: 'The device selected to run on' - artifact_name: - required: true - type: string - description: 'The torch_npu distribution artifact name' - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -jobs: - test: - name: run pytorch examples in ${{ inputs.image }} with ${{ inputs.device }} - runs-on: ${{ inputs.runner }} - container: - image: ${{ inputs.image }} - volumes: - - /usr/local/dcmi:/usr/local/dcmi - - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ - - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info - - /etc/ascend_install.info:/etc/ascend_install.info - - /home/runner/actions-runner/codes:/root/codes - options: >- - --network host - --device ${{ inputs.device }} - --device /dev/davinci_manager - --device /dev/devmm_svm - --device /dev/hisi_hdc - steps: - - name: Show NPU info - run: | - npu-smi info - - - name: Download distribution artifact - uses: actions/download-artifact@v4 - with: - name: ${{ inputs.artifact_name }} - path: /root/build diff --git a/.github/workflows/_ascend_npu_run_transformers_examples.yml b/.github/workflows/_ascend_npu_run_transformers_examples.yml deleted file mode 100644 index a9d5f8c..0000000 --- a/.github/workflows/_ascend_npu_run_transformers_examples.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: '_ascend_npu_transformers_examples' - -on: - workflow_call: - inputs: - runner: - required: true - type: string - description: 'The runner selected to run on' - image: - required: true - type: string - description: 'The docker image which will be loaded' - device: - required: true - type: string - description: 'The device selected to run on' - artifact_name: - required: true - type: string - description: 'The torch_npu distribution artifact name' - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -jobs: - test: - name: run transformers examples in ${{ inputs.image }} with ${{ inputs.device }} - runs-on: ${{ inputs.runner }} - container: - image: ${{ inputs.image }} - volumes: - - /usr/local/dcmi:/usr/local/dcmi - - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ - - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info - - /etc/ascend_install.info:/etc/ascend_install.info - - /home/runner/actions-runner/codes:/root/codes - options: >- - --network host - --device ${{ inputs.device }} - --device /dev/davinci_manager - --device /dev/devmm_svm - --device /dev/hisi_hdc - steps: - - name: Show NPU info - run: | - npu-smi info - - - name: Download distribution artifact - uses: actions/download-artifact@v4 - with: - name: ${{ inputs.artifact_name }} - path: /root/build diff --git a/.github/workflows/_ascend_npu_run_transformers_timm.yml b/.github/workflows/_ascend_npu_run_transformers_timm.yml deleted file mode 100644 index cdb084f..0000000 --- a/.github/workflows/_ascend_npu_run_transformers_timm.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: '_ascend_npu_transformers_timm' - -on: - workflow_call: - inputs: - runner: - required: true - type: string - description: 'The runner selected to run on' - image: - required: true - type: string - description: 'The docker image which will be loaded' - device: - required: true - type: string - description: 'The device selected to run on' - artifact_name: - required: true - type: string - description: 'The torch_npu distribution artifact name' - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -jobs: - test: - name: run transformers timm in ${{ inputs.image }} with ${{ inputs.device }} - runs-on: ${{ inputs.runner }} - container: - image: ${{ inputs.image }} - volumes: - - /usr/local/dcmi:/usr/local/dcmi - - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ - - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info - - /etc/ascend_install.info:/etc/ascend_install.info - - /home/runner/actions-runner/codes:/root/codes - options: >- - --network host - --device ${{ inputs.device }} - --device /dev/davinci_manager - --device /dev/devmm_svm - --device /dev/hisi_hdc - steps: - - name: Show NPU info - run: | - npu-smi info - - - name: Download distribution artifact - uses: actions/download-artifact@v4 - with: - name: ${{ inputs.artifact_name }} - path: /root/build diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index c5c35f1..fb30153 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -112,7 +112,7 @@ jobs: artifact_name: ${{ needs.build.outputs.artifact_name }} pytorch-examples: - name: Run PyTorch examples + name: Run models needs: - prepare - test @@ -122,39 +122,3 @@ jobs: image: ${{ needs.prepare.outputs.image }} device: ${{ needs.prepare.outputs.device }} artifact_name: ${{ needs.build.outputs.artifact_name }} - - pytorch-benchmark: - name: Run PyTorch benchmark - needs: - - prepare - - pytorch-examples - uses: ./.github/workflows/_ascend_npu_run_pytorch_benchmark.yml - with: - runner: ${{ needs.prepare.outputs.runner }} - image: ${{ needs.prepare.outputs.image }} - device: ${{ needs.prepare.outputs.device }} - artifact_name: ${{ needs.build.outputs.artifact_name }} - - transformers-examples: - name: Run transformers examples - needs: - - prepare - - pytorch-benchmark - uses: ./.github/workflows/_ascend_npu_run_transformers_examples.yml - with: - runner: ${{ needs.prepare.outputs.runner }} - image: ${{ needs.prepare.outputs.image }} - device: ${{ needs.prepare.outputs.device }} - artifact_name: ${{ needs.build.outputs.artifact_name }} - - transformers-timm: - name: Run PyTorch image models(timm) - needs: - - prepare - - transformers-examples - uses: ./.github/workflows/_ascend_npu_run_transformers_timm.yml - with: - runner: ${{ needs.prepare.outputs.runner }} - image: ${{ needs.prepare.outputs.image }} - device: ${{ needs.prepare.outputs.device }} - artifact_name: ${{ needs.build.outputs.artifact_name }} From 3e0c2c60bff4e2a78cc6660b12f0b437bc9b0ab0 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Wed, 6 Nov 2024 17:12:07 +0800 Subject: [PATCH 06/13] update --- .../actions/dependencies-action/action.yml | 11 +-- .github/workflows/_ascend_npu_build.yml | 32 +++++---- .github/workflows/ascend_npu_test.yml | 69 +++++++------------ 3 files changed, 47 insertions(+), 65 deletions(-) diff --git a/.github/actions/dependencies-action/action.yml b/.github/actions/dependencies-action/action.yml index 051472c..080b5b6 100644 --- a/.github/actions/dependencies-action/action.yml +++ b/.github/actions/dependencies-action/action.yml @@ -29,7 +29,6 @@ inputs: runs: using: composite steps: - # TODO(shink): Remove this step after building devel images - name: Install system dependencies shell: bash env: @@ -40,19 +39,13 @@ runs: apt update apt install --no-install-recommends -y \ git \ - gcc-10 \ - g++-10 \ + gcc \ + g++ \ make \ cmake \ ninja-build echo "::endgroup::" - - name: Create symlinks for gcc and g++ - shell: bash - run: | - ln -s /usr/bin/gcc-10 /usr/bin/gcc - ln -s /usr/bin/g++-10 /usr/bin/g++ - - name: Show versions shell: bash run: | diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index 4738c44..849166b 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -29,40 +29,46 @@ jobs: runs-on: ${{ inputs.runner }} container: image: ${{ inputs.image }} - volumes: - - /home/runner/actions-runner/codes:/root/codes outputs: dist_name: ${{ steps.list-dist.outputs.dist_name }} steps: - - name: Prepare the codes - run: | - cp -rf /root/codes /root/build + - name: Install system dependencies + uses: ./.github/actions/dependencies-action - name: Checkout uses: actions/checkout@v4 - - name: Install dependencies - uses: ./.github/actions/dependencies-action + - name: Checkout torch_npu + uses: actions/checkout@v4 with: - pip_packages: | - - wheel - pip_requirements: | - - /root/build/npu/pytorch/requirements.txt + # TODO(shink): Use ascend/pytorch once this pr merged: + # https://gitee.com/ascend/pytorch/pulls/12854 + repo: shink/torchnpu + ref: feat/autoload + submodules: recursive + path: torch_npu + + - name: Install pip dependencies + working-directory: torch_npu + run: | + pip install wheel + pip install -r requirements.txt - name: List Python version id: list-py-version + working-directory: torch_npu run: | py_version=$(python --version | awk '{print $2}' | cut -d '.' -f 1,2) echo "py_version=${py_version}" >> $GITHUB_OUTPUT - name: Build torch_npu - working-directory: /root/build/npu/pytorch + working-directory: torch_npu run: | bash ci/build.sh --python=${{ steps.list-py-version.outputs.py_version }} - name: List distribution package id: list-dist - working-directory: /root/build/npu/pytorch/dist + working-directory: torch_npu/dist run: | dist_name=$(ls torch_npu*.whl) dist_path=$(pwd)/${dist_name} diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index fb30153..8a0fc1d 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -1,6 +1,3 @@ -# Note: -# Same runner only needs one job named like .*fetch-and-rebase - name: Ascend NPU Test Suite on: @@ -36,7 +33,8 @@ on: - ascendai/cann:7.1-openeuler2203sp2 - ascendai/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.9 - ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9 - default: 'ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9' + - latest + default: 'latest' description: 'The docker image which will be loaded' device: required: true @@ -73,52 +71,37 @@ jobs: set -e echo "runner=${{ github.event.inputs.runner || 'self-hosted' }}" >> $GITHUB_OUTPUT echo "device=${{ github.event.inputs.device || '/dev/davinci6' }}" >> $GITHUB_OUTPUT - echo "image=${{ github.event.inputs.image || 'ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9' }}" >> $GITHUB_OUTPUT - - # TODO(shink): remove this job, use actions/checkout instead - fetch-and-rebase: - name: Fetch and rebase - runs-on: ${{ needs.prepare.outputs.runner }} - needs: - - prepare - steps: - - name: Pull latest codes for torch_npu - uses: ./.github/actions/fetch-and-rebase - with: - repo_path: /home/runner/actions-runner/codes/npu/pytorch - remote_branch: upstream/master - loop: 10 + echo "image=${{ github.event.inputs.image || 'latest' }}" >> $GITHUB_OUTPUT build: name: Build torch_npu needs: - prepare - - fetch-and-rebase uses: ./.github/workflows/_ascend_npu_build.yml with: runner: ${{ needs.prepare.outputs.runner }} image: ${{ needs.prepare.outputs.image }} - test: - name: Test torch_npu - needs: - - prepare - - build - uses: ./.github/workflows/_ascend_npu_test.yml - with: - runner: ${{ needs.prepare.outputs.runner }} - image: ${{ needs.prepare.outputs.image }} - device: ${{ needs.prepare.outputs.device }} - artifact_name: ${{ needs.build.outputs.artifact_name }} - - pytorch-examples: - name: Run models - needs: - - prepare - - test - uses: ./.github/workflows/_ascend_npu_run_pytorch_examples.yml - with: - runner: ${{ needs.prepare.outputs.runner }} - image: ${{ needs.prepare.outputs.image }} - device: ${{ needs.prepare.outputs.device }} - artifact_name: ${{ needs.build.outputs.artifact_name }} +# test: +# name: Test torch_npu +# needs: +# - prepare +# - build +# uses: ./.github/workflows/_ascend_npu_test.yml +# with: +# runner: ${{ needs.prepare.outputs.runner }} +# image: ${{ needs.prepare.outputs.image }} +# device: ${{ needs.prepare.outputs.device }} +# artifact_name: ${{ needs.build.outputs.artifact_name }} +# +# pytorch-examples: +# name: Run models +# needs: +# - prepare +# - test +# uses: ./.github/workflows/_ascend_npu_run_pytorch_examples.yml +# with: +# runner: ${{ needs.prepare.outputs.runner }} +# image: ${{ needs.prepare.outputs.image }} +# device: ${{ needs.prepare.outputs.device }} +# artifact_name: ${{ needs.build.outputs.artifact_name }} From 0f4c33d6ab32ecc6b0ca7d538da43e63672df398 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Wed, 6 Nov 2024 17:16:19 +0800 Subject: [PATCH 07/13] update --- .github/workflows/ascend_npu_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index 8a0fc1d..8eb74df 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -33,8 +33,8 @@ on: - ascendai/cann:7.1-openeuler2203sp2 - ascendai/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.9 - ascendai/cann:8.0.rc3.alpha002-910b-ubuntu22.04-py3.9 - - latest - default: 'latest' + - ascendai/cann:latest + default: 'ascendai/cann:latest' description: 'The docker image which will be loaded' device: required: true @@ -71,7 +71,7 @@ jobs: set -e echo "runner=${{ github.event.inputs.runner || 'self-hosted' }}" >> $GITHUB_OUTPUT echo "device=${{ github.event.inputs.device || '/dev/davinci6' }}" >> $GITHUB_OUTPUT - echo "image=${{ github.event.inputs.image || 'latest' }}" >> $GITHUB_OUTPUT + echo "image=${{ github.event.inputs.image || 'ascendai/cann:latest' }}" >> $GITHUB_OUTPUT build: name: Build torch_npu From ac7afea90c107d383e64cae3f269bba8c6488d54 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Wed, 6 Nov 2024 17:21:33 +0800 Subject: [PATCH 08/13] update --- .github/workflows/_ascend_npu_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index 849166b..ebb2320 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -43,7 +43,7 @@ jobs: with: # TODO(shink): Use ascend/pytorch once this pr merged: # https://gitee.com/ascend/pytorch/pulls/12854 - repo: shink/torchnpu + repository: shink/torchnpu ref: feat/autoload submodules: recursive path: torch_npu From c2de9a1bfb5ff1c0bc847210daca70b4ccf8c394 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Wed, 6 Nov 2024 17:24:55 +0800 Subject: [PATCH 09/13] update --- .../actions/dependencies-action/action.yml | 85 ------------------- .github/workflows/_ascend_npu_build.yml | 12 ++- 2 files changed, 11 insertions(+), 86 deletions(-) delete mode 100644 .github/actions/dependencies-action/action.yml diff --git a/.github/actions/dependencies-action/action.yml b/.github/actions/dependencies-action/action.yml deleted file mode 100644 index 080b5b6..0000000 --- a/.github/actions/dependencies-action/action.yml +++ /dev/null @@ -1,85 +0,0 @@ -# An action for installing dependencies. You can set -# custom pip dependencies as following: -# -# Usage: -# -# - name: Install dependencies -# uses: ./.github/actions/dependencies-action -# with: -# pip_packages: | -# - torch -# - triton -# pip_requirements: | -# - /path/a/requirements.txt -# - /path/b/requirements.txt --no-deps - -name: Install dependencies -description: Install OS and pip dependencies - -inputs: - pip_packages: - description: List of pip packages to install - required: false - default: '' - pip_requirements: - description: List of requirements files to install - required: false - default: '' - -runs: - using: composite - steps: - - name: Install system dependencies - shell: bash - env: - DEBIAN_FRONTEND: noninteractive - run: | - echo "::group::Install system dependencies" - sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list - apt update - apt install --no-install-recommends -y \ - git \ - gcc \ - g++ \ - make \ - cmake \ - ninja-build - echo "::endgroup::" - - - name: Show versions - shell: bash - run: | - set -x - python --version - pip --version - gcc --version - g++ --version - make --version - cmake --version - ninja --version - - - name: Install extra pip packages - if: ${{ inputs.pip_packages != '' }} - shell: bash - run: | - echo "${{ inputs.pip_packages }}" | while read -r package; do - package="${package#- }" - if [ -n "$(echo "$package" | xargs)" ]; then - echo "::group::Installing pip package: $package" - pip install $package -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple - echo "::endgroup::" - fi - done - - - name: Install extra pip requirements - if: ${{ inputs.pip_requirements != '' }} - shell: bash - run: | - echo "${{ inputs.pip_requirements }}" | while read -r requirement; do - requirement="${requirement#- }" - if [ -n "$(echo "$requirement" | xargs)" ]; then - echo "::group::Installing from requirements: $requirement" - pip install -r $requirement -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple - echo "::endgroup::" - fi - done diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index ebb2320..3ee0667 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -32,8 +32,18 @@ jobs: outputs: dist_name: ${{ steps.list-dist.outputs.dist_name }} steps: + # TODO(shink): Remove this step when devel images available - name: Install system dependencies - uses: ./.github/actions/dependencies-action + run: | + sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list + apt update + apt install --no-install-recommends -y \ + git \ + gcc \ + g++ \ + make \ + cmake \ + ninja-build - name: Checkout uses: actions/checkout@v4 From ec69cde3df3ae32e0a9ccac753768dcffbae4c97 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Wed, 6 Nov 2024 17:38:26 +0800 Subject: [PATCH 10/13] update --- .github/workflows/_ascend_npu_build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index 3ee0667..b8e7ad7 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -42,8 +42,7 @@ jobs: gcc \ g++ \ make \ - cmake \ - ninja-build + cmake - name: Checkout uses: actions/checkout@v4 @@ -51,8 +50,9 @@ jobs: - name: Checkout torch_npu uses: actions/checkout@v4 with: - # TODO(shink): Use ascend/pytorch once this pr merged: + # TODO(shink): Use Ascend/pytorch once this pr merged: # https://gitee.com/ascend/pytorch/pulls/12854 + # repository: Ascend/pytorch repository: shink/torchnpu ref: feat/autoload submodules: recursive From d1641495edf37226e020ff16e5d3892d7501925c Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Wed, 6 Nov 2024 17:46:50 +0800 Subject: [PATCH 11/13] update --- .github/workflows/_ascend_npu_build.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index b8e7ad7..dfcfc5e 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -32,17 +32,12 @@ jobs: outputs: dist_name: ${{ steps.list-dist.outputs.dist_name }} steps: - # TODO(shink): Remove this step when devel images available + # TODO(shink): Should we add these dependencies to the image? - name: Install system dependencies run: | sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list apt update - apt install --no-install-recommends -y \ - git \ - gcc \ - g++ \ - make \ - cmake + apt install --no-install-recommends -y git gcc g++ make cmake ninja-build - name: Checkout uses: actions/checkout@v4 From 76438e1f67e631ff8b4514c8f89c8c1f5a1ff515 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Wed, 6 Nov 2024 17:57:53 +0800 Subject: [PATCH 12/13] update --- .github/workflows/_ascend_npu_test.yml | 48 +++++++++++++++----------- .github/workflows/ascend_npu_test.yml | 24 ++++++------- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_test.yml index 06fbcdb..37932ce 100644 --- a/.github/workflows/_ascend_npu_test.yml +++ b/.github/workflows/_ascend_npu_test.yml @@ -39,7 +39,6 @@ jobs: - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info - /etc/ascend_install.info:/etc/ascend_install.info - - /home/runner/actions-runner/codes:/root/codes options: >- --network host --device ${{ inputs.device }} @@ -51,39 +50,46 @@ jobs: run: | npu-smi info - - name: Prepare the codes + - name: Install system dependencies run: | - cp -rf /root/codes /root/build + sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list + apt update + apt install --no-install-recommends -y git gcc g++ make cmake ninja-build + + - name: Checkout + uses: actions/checkout@v4 + + - name: Checkout torch_npu + uses: actions/checkout@v4 + with: + # TODO(shink): Use Ascend/pytorch once this pr merged: + # https://gitee.com/ascend/pytorch/pulls/12854 + # repository: Ascend/pytorch + repository: shink/torchnpu + ref: feat/autoload + submodules: recursive + path: torch_npu - name: Download distribution artifact uses: actions/download-artifact@v4 with: name: ${{ inputs.artifact_name }} - path: /root/build + path: torch_npu - - name: Checkout - uses: actions/checkout@v4 - - - name: Install dependencies - uses: ./.github/actions/dependencies-action - with: - pip_packages: | - - wheel - - unittest-xml-reporting - pip_requirements: | - - /root/build/npu/pytorch/requirements.txt - - /root/build/npu/pytorch/test/requirements.txt --no-deps + - name: Install pip dependencies + working-directory: torch_npu + run: | + pip install wheel unittest-xml-reporting + pip install -r requirements.txt + pip install -r test/requirements.txt --no-deps - name: Install torch_npu - working-directory: /root/build + working-directory: torch_npu run: | pip install ${{ inputs.artifact_name }} # TODO(shink): Skip - name: Do the test continue-on-error: true - working-directory: /root/build run: | - python npu/pytorch/ci/access_control_test.py - env: - DISABLED_TESTS_FILE: /root/build/npu/pytorch/test/unsupported_test_cases/.pytorch-disabled-tests.json + python torch_npu/ci/access_control_test.py diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index 8eb74df..bfeae33 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -82,18 +82,18 @@ jobs: runner: ${{ needs.prepare.outputs.runner }} image: ${{ needs.prepare.outputs.image }} -# test: -# name: Test torch_npu -# needs: -# - prepare -# - build -# uses: ./.github/workflows/_ascend_npu_test.yml -# with: -# runner: ${{ needs.prepare.outputs.runner }} -# image: ${{ needs.prepare.outputs.image }} -# device: ${{ needs.prepare.outputs.device }} -# artifact_name: ${{ needs.build.outputs.artifact_name }} -# + test: + name: Test torch_npu + needs: + - prepare + - build + uses: ./.github/workflows/_ascend_npu_test.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + device: ${{ needs.prepare.outputs.device }} + artifact_name: ${{ needs.build.outputs.artifact_name }} + # pytorch-examples: # name: Run models # needs: From 22553342cfbcaf95ad4bf62aaa1e23c1a87c0b34 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Fri, 8 Nov 2024 09:48:40 +0800 Subject: [PATCH 13/13] restore --- .github/actions/fetch-and-rebase/action.yml | 51 ----------------- .github/workflows/_ascend_npu_run_models.yml | 58 -------------------- .github/workflows/ascend_npu_test.yml | 12 ---- 3 files changed, 121 deletions(-) delete mode 100644 .github/actions/fetch-and-rebase/action.yml delete mode 100644 .github/workflows/_ascend_npu_run_models.yml diff --git a/.github/actions/fetch-and-rebase/action.yml b/.github/actions/fetch-and-rebase/action.yml deleted file mode 100644 index fa72a6a..0000000 --- a/.github/actions/fetch-and-rebase/action.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Fetch and Rebase - -description: Fetch and rebase for repository - -inputs: - repo_path: - description: the path the repository locate on - required: true - remote_branch: - description: the branch rebased from - required: true - loop: - description: loop times when fails - required: true - -runs: - using: composite - steps: - - name: Fetch and rebase - env: - REPO_PATH: ${{ inputs.repo_path }} - REMOTE_BRANCH: ${{ inputs.remote_branch }} - LOOP: ${{ inputs.loop }} - shell: bash - run: | - set +e - - COUNT=0 - - for i in $(seq 1 ${LOOP}) - do - pushd ${REPO_PATH} && - git fetch --all && - git rebase ${REMOTE_BRANCH} && - git submodule sync && - git submodule update --init --recursive && - git reset --hard HEAD && - git clean -dfx && - git submodule foreach git reset --hard HEAD && - git submodule foreach git clean -dfx && - popd - - if [[ $? -ne 0 ]] - then - let COUNT++ - else - break - fi - done - - [[ ${COUNT} -lt ${LOOP} ]] && true || false diff --git a/.github/workflows/_ascend_npu_run_models.yml b/.github/workflows/_ascend_npu_run_models.yml deleted file mode 100644 index d2e984b..0000000 --- a/.github/workflows/_ascend_npu_run_models.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: '_ascend_npu_pytorch_benchmark' - -on: - workflow_call: - inputs: - runner: - required: true - type: string - description: 'The runner selected to run on' - image: - required: true - type: string - description: 'The docker image which will be loaded' - device: - required: true - type: string - description: 'The device selected to run on' - artifact_name: - required: true - type: string - description: 'The torch_npu distribution artifact name' - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -jobs: - test: - name: run pytorch benchmark in ${{ inputs.image }} with ${{ inputs.device }} - runs-on: ${{ inputs.runner }} - container: - image: ${{ inputs.image }} - volumes: - - /usr/local/dcmi:/usr/local/dcmi - - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ - - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info - - /etc/ascend_install.info:/etc/ascend_install.info - - /home/runner/actions-runner/codes:/root/codes - options: >- - --network host - --device ${{ inputs.device }} - --device /dev/davinci_manager - --device /dev/devmm_svm - --device /dev/hisi_hdc - steps: - - name: Show NPU info - run: | - npu-smi info - - - name: Download distribution artifact - uses: actions/download-artifact@v4 - with: - name: ${{ inputs.artifact_name }} - path: /root/build diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index bfeae33..f0e226b 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -93,15 +93,3 @@ jobs: image: ${{ needs.prepare.outputs.image }} device: ${{ needs.prepare.outputs.device }} artifact_name: ${{ needs.build.outputs.artifact_name }} - -# pytorch-examples: -# name: Run models -# needs: -# - prepare -# - test -# uses: ./.github/workflows/_ascend_npu_run_pytorch_examples.yml -# with: -# runner: ${{ needs.prepare.outputs.runner }} -# image: ${{ needs.prepare.outputs.image }} -# device: ${{ needs.prepare.outputs.device }} -# artifact_name: ${{ needs.build.outputs.artifact_name }}