diff --git a/.github/actions/install-artifact/action.yml b/.github/actions/install-artifact/action.yml new file mode 100644 index 0000000..864d8ea --- /dev/null +++ b/.github/actions/install-artifact/action.yml @@ -0,0 +1,21 @@ +name: "Install artifact with pip" +description: "Install artifact with pip" +inputs: + artifact: + description: "The distribution artifact name" + type: string + required: true + +# TODO: https://github.com/actions/runner/issues/3620 +runs: + using: "composite" + steps: + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact }} + + - name: Install artifact + shell: bash + run: | + pip install ${{ inputs.artifact }} diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml index 67c46b0..5989a4b 100644 --- a/.github/workflows/_ascend_npu_benchmark.yml +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -15,10 +15,14 @@ on: required: true type: string description: 'The device selected to run on' - artifact_name: + torch-artifact: + required: false + type: string + description: 'The distribution artifact name of torch' + torch-npu-artifact: required: true type: string - description: 'The torch_npu distribution artifact name' + description: 'The distribution artifact name of torch_npu' secrets: pr-token: description: 'A token used to create a pull request' @@ -87,29 +91,61 @@ jobs: repository: pytorch/benchmark path: benchmark - - name: Download ${{ inputs.artifact_name }} + # TODO + # - name: Install torch + # id: install-torch + # uses: ./.github/actions/install-artifact + # with: + # artifact: ${{ inputs.torch-artifact }} + + - name: Download torch artifact + if: ${{ inputs.torch-artifact }} uses: actions/download-artifact@v4 with: - name: ${{ inputs.artifact_name }} - path: ascend_npu + name: ${{ inputs.torch-artifact }} - - name: Install torch_npu - working-directory: ascend_npu + - name: Install torch + if: ${{ inputs.torch-artifact }} run: | - pip install ${{ inputs.artifact_name }} + pip install ${{ inputs.torch-artifact }} - name: Install torch_npu dependencies + if: ${{ !inputs.torch-artifact }} + run: | + pip install -r https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt + + - name: List torch version + id: list-torch-version + shell: bash + run: | + torch_version=$(python -c "import torch; print(torch.__version__)") + echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT + + - name: Download torch_npu artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.torch-npu-artifact }} + path: ascend_npu + + - name: Install torch_npu working-directory: ascend_npu run: | - curl -fsSL -O https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt - pip install -r requirements.txt + pip install ${{ inputs.torch-npu-artifact }} - # TODO: We must use numpy 1.x - name: Install benchmark dependencies run: | - pip install -r benchmark/requirements.txt --constraint ascend_npu/requirements.txt "numpy==1.*" + pip install -r benchmark/requirements.txt \ + torch==${{ steps.list-torch-version.outputs.torch-version }} \ + numpy==1.* + + - name: Install dependencies for all the models + run: | python benchmark/install.py --userbenchmark test_bench --continue_on_fail + - name: Install nightly torchvision and torchaudio + run: | + pip install --pre torchvision torchaudio --no-deps --index-url https://download.pytorch.org/whl/nightly/cpu + - name: Install project dependencies run: | pip install -r requirements.txt @@ -128,7 +164,7 @@ jobs: python run_benchmark.py test_bench --accuracy --device npu --test eval \ --output ascend_npu_benchmark.json - - name: Upload the output file + - name: Upload the benchmark report file id: upload-output uses: actions/upload-artifact@v4 with: @@ -136,6 +172,7 @@ jobs: path: benchmark/ascend_npu_benchmark.json if-no-files-found: error retention-days: 1 + overwrite: true - name: Write to workflow job summary run: | diff --git a/.github/workflows/_ascend_npu_build_torch.yml b/.github/workflows/_ascend_npu_build_torch.yml new file mode 100644 index 0000000..53e3975 --- /dev/null +++ b/.github/workflows/_ascend_npu_build_torch.yml @@ -0,0 +1,109 @@ +name: '_ascend_npu_build_torch' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be used to build' + ref: + required: false + type: string + default: 'refs/heads/main' + description: 'The branch, tag or SHA to checkout' + outputs: + torch-artifact: + description: 'The distribution artifact name of torch' + value: ${{ jobs.build.outputs.dist-name }} + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + build: + name: build torch + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + options: >- + --network host + env: + HTTP_PROXY: http://127.0.0.1:10809 + HTTPS_PROXY: http://127.0.0.1:10809 + ALL_PROXY: socks5://127.0.0.1:10808 + SOCKS_PROXY: socks5://127.0.0.1:10808 + outputs: + dist-name: ${{ steps.list-dist.outputs.dist-name }} + steps: + - name: Config mirrors + run: | + sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list + pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + + - name: Install system dependencies + run: | + apt-get update + apt-get install -y git gcc g++ make cmake ninja-build + + # See: https://github.com/actions/checkout/issues/363#issuecomment-1915075699 + - name: Config git + run: | + git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + repository: pytorch/pytorch + ref: ${{ inputs.ref }} + submodules: recursive + path: pytorch + + - name: View commit history + working-directory: pytorch + run: | + git log -n 10 --graph | cat + + - name: Install torch dependencies + working-directory: pytorch + run: | + pip install -r requirements.txt + + - name: Build torch + working-directory: pytorch + run: | + python setup.py build bdist_wheel + + - name: List distribution package + id: list-dist + working-directory: pytorch/dist + run: | + dist_name=$(ls torch*.whl) + dist_path=$(pwd)/${dist_name} + echo "dist-name=${dist_name}" >> $GITHUB_OUTPUT + echo "dist-path=${dist_path}" >> $GITHUB_OUTPUT + + - name: Upload distribution artifact + id: upload-dist + continue-on-error: true + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.list-dist.outputs.dist-name }} + path: ${{ steps.list-dist.outputs.dist-path }} + if-no-files-found: error + retention-days: 1 + overwrite: true + + - name: Write to workflow job summary + if: ${{ steps.upload-dist.outputs.artifact-url }} + run: | + echo "## torch built successfully! :rocket:" >> $GITHUB_STEP_SUMMARY + echo "You can download the distribution package [here](${{ steps.upload-dist.outputs.artifact-url }})." >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build_torch_npu.yml similarity index 72% rename from .github/workflows/_ascend_npu_build.yml rename to .github/workflows/_ascend_npu_build_torch_npu.yml index 1f2ee52..64b4159 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build_torch_npu.yml @@ -11,10 +11,14 @@ on: required: true type: string description: 'The docker image which will be used to build' + torch-artifact: + required: false + type: string + description: 'The distribution artifact name of torch' outputs: - artifact_name: - description: 'The torch_npu distribution artifact name' - value: ${{ jobs.build.outputs.dist_name }} + torch-npu-artifact: + description: 'The distribution artifact name of torch_npu' + value: ${{ jobs.build.outputs.dist-name }} # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly # declared as "shell: bash -el {0}" on steps that need to be properly activated. @@ -37,14 +41,13 @@ jobs: ALL_PROXY: socks5://127.0.0.1:10808 SOCKS_PROXY: socks5://127.0.0.1:10808 outputs: - dist_name: ${{ steps.list-dist.outputs.dist_name }} + dist-name: ${{ steps.list-dist.outputs.dist-name }} steps: - name: Config mirrors run: | sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple - # TODO(shink): Should we add these dependencies to the image? - name: Install system dependencies run: | apt-get update @@ -69,23 +72,41 @@ jobs: submodules: recursive path: torch_npu - - name: Install pip dependencies + - name: Install torch_npu dependencies working-directory: torch_npu run: | pip install wheel pip install -r requirements.txt + # TODO + # - name: Install torch + # id: install-torch + # uses: ./.github/actions/install-artifact + # with: + # artifact: ${{ inputs.torch-artifact }} + + - name: Download torch artifact + if: ${{ inputs.torch-artifact }} + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.torch-artifact }} + + - name: Install torch + if: ${{ inputs.torch-artifact }} + run: | + pip install ${{ inputs.torch-artifact }} + - name: List Python version id: list-py-version working-directory: torch_npu run: | py_version=$(python --version | awk '{print $2}' | cut -d '.' -f 1,2) - echo "py_version=${py_version}" >> $GITHUB_OUTPUT + echo "py-version=${py_version}" >> $GITHUB_OUTPUT - name: Build torch_npu working-directory: torch_npu run: | - bash ci/build.sh --python=${{ steps.list-py-version.outputs.py_version }} + bash ci/build.sh --python=${{ steps.list-py-version.outputs.py-version }} - name: List distribution package id: list-dist @@ -93,16 +114,16 @@ jobs: run: | dist_name=$(ls torch_npu*.whl) dist_path=$(pwd)/${dist_name} - echo "dist_name=${dist_name}" >> $GITHUB_OUTPUT - echo "dist_path=${dist_path}" >> $GITHUB_OUTPUT + echo "dist-name=${dist_name}" >> $GITHUB_OUTPUT + echo "dist-path=${dist_path}" >> $GITHUB_OUTPUT - name: Upload distribution artifact id: upload-dist continue-on-error: true uses: actions/upload-artifact@v4 with: - name: ${{ steps.list-dist.outputs.dist_name }} - path: ${{ steps.list-dist.outputs.dist_path }} + name: ${{ steps.list-dist.outputs.dist-name }} + path: ${{ steps.list-dist.outputs.dist-path }} if-no-files-found: error retention-days: 1 overwrite: true diff --git a/.github/workflows/_ascend_npu_ut.yml b/.github/workflows/_ascend_npu_ut.yml index 22d78be..c11cfb8 100644 --- a/.github/workflows/_ascend_npu_ut.yml +++ b/.github/workflows/_ascend_npu_ut.yml @@ -15,10 +15,14 @@ on: required: true type: string description: 'The device selected to run on' - artifact_name: + torch-artifact: + required: false + type: string + description: 'The distribution artifact name of torch' + torch-npu-artifact: required: true type: string - description: 'The torch_npu distribution artifact name' + description: 'The distribution artifact name of torch_npu' # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly # declared as "shell: bash -el {0}" on steps that need to be properly activated. @@ -83,23 +87,49 @@ jobs: ref: feat/autoload path: torch_npu - - name: Download distribution artifact - uses: actions/download-artifact@v4 - with: - name: ${{ inputs.artifact_name }} - path: torch_npu - - name: Install pip dependencies working-directory: torch_npu run: | - pip install wheel unittest-xml-reporting + pip install wheel unittest-xml-reporting tabulate pip install -r requirements.txt pip install -r test/requirements.txt --no-deps + # TODO + # - name: Install torch + # id: install-torch + # uses: ./.github/actions/install-artifact + # with: + # artifact: ${{ inputs.torch-artifact }} + + - name: Download torch artifact + if: ${{ inputs.torch-artifact }} + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.torch-artifact }} + + - name: Install torch + if: ${{ inputs.torch-artifact }} + run: | + pip install ${{ inputs.torch-artifact }} + + - name: Download torch_npu artifact + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.torch-npu-artifact }} + path: torch_npu + - name: Install torch_npu working-directory: torch_npu run: | - pip install ${{ inputs.artifact_name }} + pip install ${{ inputs.torch-npu-artifact }} + + - name: Show environment info + run: | + npu_is_available=$(python -c "import torch; print(torch.npu.is_available())") + npu_count=$(python -c "import torch; print(torch.npu.device_count())") + echo "NPU is available: ${npu_is_available}" + echo "NPU count: ${npu_count}" + pip list | grep -E 'torch|numpy' # TODO(shink): Skip - name: Do the test diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index da6c8d4..db9b1f9 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -1,4 +1,4 @@ -name: Ascend NPU Test Suite +name: 'Ascend NPU Test Suite' on: push: @@ -6,7 +6,8 @@ on: - 'main' paths: - '.github/workflows/ascend_npu_test.yml' - - '.github/workflows/_ascend_npu_build.yml' + - '.github/workflows/_ascend_npu_build_torch.yml' + - '.github/workflows/_ascend_npu_build_torch_npu.yml' - '.github/workflows/_ascend_npu_ut.yml' - '.github/workflows/_ascend_npu_benchmark.yml' - '.github/actions/**' @@ -19,7 +20,8 @@ on: - 'main' paths: - '.github/workflows/ascend_npu_test.yml' - - '.github/workflows/_ascend_npu_build.yml' + - '.github/workflows/_ascend_npu_build_torch.yml' + - '.github/workflows/_ascend_npu_build_torch_npu.yml' - '.github/workflows/_ascend_npu_ut.yml' - '.github/workflows/_ascend_npu_benchmark.yml' - '.github/actions/**' @@ -69,60 +71,87 @@ on: default: '/dev/davinci5' description: 'The device selected to run on' -# Only cancel the previous runs when triggered by a pull request +# Only cancel the previous runs when triggered by a pull_request event or a repository_dispatch event concurrency: group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} + cancel-in-progress: ${{ github.event_name == 'pull_request' || github.event_name == 'repository_dispatch' }} jobs: prepare: name: Prepare runs-on: ubuntu-latest outputs: - runner: ${{ steps.set-param.outputs.runner }} - image: ${{ steps.set-param.outputs.image }} - device: ${{ steps.set-param.outputs.device }} + runner: ${{ steps.set-env.outputs.runner }} + image: ${{ steps.set-env.outputs.image }} + device: ${{ steps.set-env.outputs.device }} + ref: ${{ steps.list-ref.outputs.ref }} steps: - - name: Set param - id: set-param + - name: Set environment params + id: set-env run: | - set -e echo "runner=${{ github.event.inputs.runner || 'self-hosted' }}" >> $GITHUB_OUTPUT echo "device=${{ github.event.inputs.device || '/dev/davinci5' }}" >> $GITHUB_OUTPUT echo "image=${{ github.event.inputs.image || 'ascendai/cann:latest' }}" >> $GITHUB_OUTPUT + # TODO(shink): List ghstack PR's ref + - name: List ref to the PyTorch branch + id: list-ref + if: ${{ github.event_name == 'repository_dispatch' }} + run: | + echo "ref=refs/pull/${{ github.event.client_payload.pull_request.number }}/merge" >> $GITHUB_OUTPUT + + build-torch: + name: Build torch + needs: + - prepare + if: ${{ needs.prepare.outputs.ref }} + uses: ./.github/workflows/_ascend_npu_build_torch.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + ref: ${{ needs.prepare.outputs.ref }} + build: name: Build torch_npu needs: - prepare - uses: ./.github/workflows/_ascend_npu_build.yml + - build-torch + if: ${{ !cancelled() && (success() || needs.build-torch.result == 'skipped') }} + uses: ./.github/workflows/_ascend_npu_build_torch_npu.yml with: runner: ${{ needs.prepare.outputs.runner }} image: ${{ needs.prepare.outputs.image }} + torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }} test: name: Test torch_npu needs: - prepare + - build-torch - build + if: ${{ !cancelled() && (success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success')) }} uses: ./.github/workflows/_ascend_npu_ut.yml with: runner: ${{ needs.prepare.outputs.runner }} image: ${{ needs.prepare.outputs.image }} device: ${{ needs.prepare.outputs.device }} - artifact_name: ${{ needs.build.outputs.artifact_name }} + torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }} + torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }} benchmark: name: Run benchmarks needs: - prepare + - build-torch - build - test + if: ${{ !cancelled() && (success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success')) }} uses: ./.github/workflows/_ascend_npu_benchmark.yml with: runner: ${{ needs.prepare.outputs.runner }} image: ${{ needs.prepare.outputs.image }} device: ${{ needs.prepare.outputs.device }} - artifact_name: ${{ needs.build.outputs.artifact_name }} + torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }} + torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }} secrets: pr-token: ${{ secrets.COSDT_BOT_TOKEN }}