Add torchbenchmark (#11)

shink · web-flow · commit 1f1a036f78a3 · 2024-11-30T09:38:10.000+08:00
diff --git a/.ci/benchmark.py b/.ci/benchmark.py
@@ -0,0 +1,15 @@
+import os
+import sys
+from src.benchmark.utils import read_metrics, to_markdown_table
+
+if __name__ == "__main__":
+    # Generate statistics report
+    statistics_path = sys.argv[1]
+    metrics = read_metrics(statistics_path, metric="accuracy")
+    html_table = to_markdown_table(metrics)
+
+    # Write to workflow job summary
+    summary_path = os.environ["GITHUB_STEP_SUMMARY"]
+    with open(summary_path, "a") as f:
+        f.write("## Torchbenchmark statistics report\n")
+        f.write(html_table)
diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml
@@ -0,0 +1,148 @@
+name: '_ascend_npu_benchmark'
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        required: true
+        type: string
+        description: 'The runner selected to run on'
+      image:
+        required: true
+        type: string
+        description: 'The docker image which will be loaded'
+      device:
+        required: true
+        type: string
+        description: 'The device selected to run on'
+      artifact_name:
+        required: true
+        type: string
+        description: 'The torch_npu distribution artifact name'
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+jobs:
+  test:
+    name: run benchmarks for torch_npu
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: ${{ inputs.image }}
+      volumes:
+        - /usr/local/dcmi:/usr/local/dcmi
+        - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
+        - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
+        - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
+        - /etc/ascend_install.info:/etc/ascend_install.info
+      options: >-
+        --network host
+        --device ${{ inputs.device }}
+        --device /dev/davinci_manager
+        --device /dev/devmm_svm
+        --device /dev/hisi_hdc
+      env:
+        HTTP_PROXY: http://127.0.0.1:10809
+        HTTPS_PROXY: http://127.0.0.1:10809
+        ALL_PROXY: socks5://127.0.0.1:10808
+        SOCKS_PROXY: socks5://127.0.0.1:10808
+    steps:
+      - name: Show NPU info
+        run: |
+          npu-smi info
+
+      - name: Config mirrors
+        run: |
+          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+      - name: Install system dependencies
+        run: |
+          apt update
+          apt install --no-install-recommends -y \
+              git gcc g++ make cmake ninja-build curl \
+              libgl1 libglib2.0-0 libsndfile1
+
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      # TODO(shink): Update once PR merged
+      # https://github.com/pytorch/benchmark/pull/2550
+      - name: Checkout benchmark
+        uses: actions/checkout@v4
+        with:
+          repository: shink/benchmark
+          ref: feat/test_bench/continue_on_error
+          path: benchmark
+
+      - name: Download ${{ inputs.artifact_name }}
+        uses: actions/download-artifact@v4
+        with:
+          name: ${{ inputs.artifact_name }}
+          path: ascend_npu
+
+      - name: Install torch_npu
+        working-directory: ascend_npu
+        run: |
+          pip install ${{ inputs.artifact_name }}
+
+      - name: Install torch_npu dependencies
+        working-directory: ascend_npu
+        run: |
+          curl -fsSL -O https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt
+          pip install -r requirements.txt
+
+      - name: Install benchmark dependencies
+        run: |
+          pip install -r benchmark/requirements.txt --constraint ascend_npu/requirements.txt "numpy==1.*"
+          python benchmark/install.py --userbenchmark test_bench --continue_on_fail
+
+      - name: Install project dependencies
+        run: |
+          pip install -r requirements.txt
+
+      - name: Show environment info
+        run: |
+          npu_is_available=$(python -c "import torch; print(torch.npu.is_available())")
+          npu_count=$(python -c "import torch; print(torch.npu.device_count())")
+          echo "NPU is available: ${npu_is_available}"
+          echo "NPU count: ${npu_count}"
+          pip list | grep -E 'torch|numpy'
+
+      - name: Run benchmarks
+        working-directory: benchmark
+        run: |
+          python run_benchmark.py test_bench --accuracy --device npu --test eval \
+              --output ascend_npu_benchmark.json
+
+      - name: Upload the output file
+        id: upload-output
+        uses: actions/upload-artifact@v4
+        with:
+          name: ascend_npu_benchmark.json
+          path: benchmark/ascend_npu_benchmark.json
+          if-no-files-found: error
+          retention-days: 1
+          overwrite: true
+
+      - name: Write to workflow job summary
+        id: report
+        run: |
+          set -x
+          realpath benchmark/ascend_npu_benchmark.json
+          ls benchmark
+          cat benchmark/ascend_npu_benchmark.json
+
+          output_path=$(realpath benchmark/ascend_npu_benchmark.json)
+          python .ci/benchmark.py ${output_path}
+
+      # TODO(shink)
+      - name: Update README.md
+        if: ${{ github.event_name == 'push' }}
+        run: |
+          echo "${{ github.event_name }}"
+          echo "${{ github.event_name == 'push' }}"
diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml
@@ -29,6 +29,13 @@ jobs:
     runs-on: ${{ inputs.runner }}
     container:
       image: ${{ inputs.image }}
+      options: >-
+        --network host
+      env:
+        HTTP_PROXY: http://127.0.0.1:10809
+        HTTPS_PROXY: http://127.0.0.1:10809
+        ALL_PROXY: socks5://127.0.0.1:10808
+        SOCKS_PROXY: socks5://127.0.0.1:10808
     outputs:
       dist_name: ${{ steps.list-dist.outputs.dist_name }}
     steps:
diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_test.yml
@@ -29,7 +29,7 @@ defaults:
 
 jobs:
   test:
-    name: test torch_npu in ${{ inputs.image }} with ${{ inputs.device }}
+    name: test torch_npu
     runs-on: ${{ inputs.runner }}
     container:
       image: ${{ inputs.image }}
@@ -45,6 +45,11 @@ jobs:
         --device /dev/davinci_manager
         --device /dev/devmm_svm
         --device /dev/hisi_hdc
+      env:
+        HTTP_PROXY: http://127.0.0.1:10809
+        HTTPS_PROXY: http://127.0.0.1:10809
+        ALL_PROXY: socks5://127.0.0.1:10808
+        SOCKS_PROXY: socks5://127.0.0.1:10808
     steps:
       - name: Show NPU info
         run: |
@@ -67,7 +72,6 @@ jobs:
           # repository: Ascend/pytorch
           repository: shink/torchnpu
           ref: feat/autoload
-          submodules: recursive
           path: torch_npu
 
       - name: Download distribution artifact
diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml
@@ -48,7 +48,7 @@ on:
           - /dev/davinci6
           - /dev/davinci7
           - /dev/davinci8
-        default: '/dev/davinci6'
+        default: '/dev/davinci5'
         description: 'The device selected to run on'
 
 # Only cancel the previous runs when triggered by a pull request
@@ -70,7 +70,7 @@ jobs:
         run: |
           set -e
           echo "runner=${{ github.event.inputs.runner || 'self-hosted' }}" >> $GITHUB_OUTPUT
-          echo "device=${{ github.event.inputs.device || '/dev/davinci6' }}" >> $GITHUB_OUTPUT
+          echo "device=${{ github.event.inputs.device || '/dev/davinci5' }}" >> $GITHUB_OUTPUT
           echo "image=${{ github.event.inputs.image || 'ascendai/cann:latest' }}" >> $GITHUB_OUTPUT
 
   build:
@@ -93,3 +93,16 @@ jobs:
       image: ${{ needs.prepare.outputs.image }}
       device: ${{ needs.prepare.outputs.device }}
       artifact_name: ${{ needs.build.outputs.artifact_name }}
+
+  benchmark:
+    name: Run benchmarks
+    needs:
+      - prepare
+      - build
+      - test
+    uses: ./.github/workflows/_ascend_npu_benchmark.yml
+    with:
+      runner: ${{ needs.prepare.outputs.runner }}
+      image: ${{ needs.prepare.outputs.image }}
+      device: ${{ needs.prepare.outputs.device }}
+      artifact_name: ${{ needs.build.outputs.artifact_name }}
diff --git a/.gitignore b/.gitignore
@@ -1 +1,3 @@
 .idea
+.benchmarks
+__pycache__
diff --git a/README.md b/README.md
@@ -7,16 +7,122 @@ across various devices by running comprehensive GitHub workflows.
 
 ## Accelerator Integration Test Results
 
-| [torch_npu][1]                   |
-|----------------------------------|
-| [![Ascend NPU Test Suite][2]][3] | 
+<!-- Start -->
+
+|                                 | [torch_npu][1] |
+|---------------------------------|----------------|
+| simple_gpt                      | ❌              |
+| detectron2_fasterrcnn_r_50_dc5  | ❌              |
+| LearningToPaint                 | ✅              |
+| hf_GPT2_large                   | ✅              |
+| dcgan                           | ✅              |
+| nanogpt                         | ✅              |
+| fastNLP_Bert                    | ✅              |
+| moondream                       | ❌              |
+| mobilenet_v2_quantized_qat      | ❌              |
+| functorch_dp_cifar10            | ✅              |
+| simple_gpt_tp_manual            | ❌              |
+| speech_transformer              | ✅              |
+| yolov3                          | ✅              |
+| resnet50_quantized_qat          | ❌              |
+| sam_fast                        | ❌              |
+| alexnet                         | ✅              |
+| timm_efficientnet               | ✅              |
+| pyhpc_isoneutral_mixing         | ✅              |
+| basic_gnn_edgecnn               | ✅              |
+| nvidia_deeprecommender          | ❌              |
+| opacus_cifar10                  | ✅              |
+| dlrm                            | ✅              |
+| hf_Bert                         | ✅              |
+| hf_T5_generate                  | ✅              |
+| resnet50                        | ✅              |
+| hf_BigBird                      | ✅              |
+| resnext50_32x4d                 | ✅              |
+| pyhpc_turbulent_kinetic_energy  | ✅              |
+| llama                           | ✅              |
+| detectron2_maskrcnn_r_50_c4     | ❌              |
+| Super_SloMo                     | ✅              |
+| moco                            | ❌              |
+| stable_diffusion_unet           | ❌              |
+| microbench_unbacked_tolist_sum  | ✅              |
+| detectron2_maskrcnn_r_101_c4    | ❌              |
+| hf_distil_whisper               | ✅              |
+| mnasnet1_0                      | ✅              |
+| detectron2_fasterrcnn_r_50_fpn  | ❌              |
+| timm_resnest                    | ✅              |
+| hf_GPT2                         | ✅              |
+| squeezenet1_1                   | ✅              |
+| basic_gnn_gin                   | ✅              |
+| hf_clip                         | ✅              |
+| mobilenet_v2                    | ✅              |
+| drq                             | ✅              |
+| hf_Roberta_base                 | ✅              |
+| detectron2_maskrcnn_r_50_fpn    | ❌              |
+| timm_nfnet                      | ✅              |
+| timm_vovnet                     | ✅              |
+| doctr_det_predictor             | ✅              |
+| sam                             | ✅              |
+| hf_T5_large                     | ✅              |
+| mobilenet_v3_large              | ✅              |
+| detectron2_fcos_r_50_fpn        | ❌              |
+| soft_actor_critic               | ✅              |
+| llava                           | ❌              |
+| timm_regnet                     | ✅              |
+| functorch_maml_omniglot         | ✅              |
+| detectron2_fasterrcnn_r_101_c4  | ❌              |
+| hf_DistilBert                   | ✅              |
+| tts_angular                     | ✅              |
+| detectron2_maskrcnn             | ❌              |
+| basic_gnn_sage                  | ✅              |
+| tacotron2                       | ❌              |
+| detectron2_maskrcnn_r_101_fpn   | ❌              |
+| lennard_jones                   | ✅              |
+| pytorch_unet                    | ✅              |
+| vgg16                           | ✅              |
+| BERT_pytorch                    | ✅              |
+| timm_efficientdet               | ❌              |
+| pyhpc_equation_of_state         | ✅              |
+| maml                            | ✅              |
+| detectron2_fasterrcnn_r_50_c4   | ❌              |
+| resnet152                       | ✅              |
+| phlippe_densenet                | ✅              |
+| maml_omniglot                   | ✅              |
+| phlippe_resnet                  | ✅              |
+| pytorch_CycleGAN_and_pix2pix    | ✅              |
+| hf_Whisper                      | ✅              |
+| hf_T5                           | ✅              |
+| densenet121                     | ✅              |
+| cm3leon_generate                | ✅              |
+| detectron2_fasterrcnn_r_101_fpn | ❌              |
+| hf_Bert_large                   | ✅              |
+| stable_diffusion_text_encoder   | ❌              |
+| hf_Reformer                     | ❌              |
+| detectron2_fasterrcnn_r_101_dc5 | ❌              |
+| demucs                          | ✅              |
+| pytorch_stargan                 | ✅              |
+| hf_T5_base                      | ✅              |
+| torch_multimodal_clip           | ✅              |
+| vision_maskrcnn                 | ❌              |
+| timm_vision_transformer_large   | ✅              |
+| hf_Bart                         | ✅              |
+| shufflenet_v2_x1_0              | ✅              |
+| llama_v2_7b_16h                 | ❌              |
+| basic_gnn_gcn                   | ✅              |
+| resnet18                        | ✅              |
+| Background_Matting              | ✅              |
+| doctr_reco_predictor            | ✅              |
+| timm_vision_transformer         | ✅              |
+| hf_Albert                       | ✅              |
+| hf_Longformer                   | ✅              |
 
 [1]: https://github.com/ascend/pytorch
 
 [2]: https://github.com/cosdt/pytorch-integration-tests/actions/workflows/ascend_npu_test.yml/badge.svg
 
 [3]: https://github.com/cosdt/pytorch-integration-tests/actions/workflows/ascend_npu_test.yml
 
+<!-- End -->
+
 ## Overview
 
 This repository contains workflows and scripts that automate the testing
diff --git a/ascend_npu/README.md b/ascend_npu/README.md
@@ -0,0 +1,2 @@
+# Huawei Ascend NPU
+
diff --git a/ascend_npu/metadata.json b/ascend_npu/metadata.json
@@ -0,0 +1,11 @@
+{
+  "device": "npu",
+  "test": [
+    "train",
+    "eval"
+  ],
+  "models": [
+    "BERT_pytorch",
+    "hf_GPT2"
+  ]
+}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+tabulate
diff --git a/src/benchmark/__init__.py b/src/benchmark/__init__.py
@@ -0,0 +1 @@
+from .utils import *
diff --git a/src/benchmark/utils.py b/src/benchmark/utils.py
diff --git a/test/benchmark/ascend_npu_benchmark.json b/test/benchmark/ascend_npu_benchmark.json
diff --git a/test/benchmark/test_ascend_npu.py b/test/benchmark/test_ascend_npu.py
diff --git a/test/gpt2_test.py b/test/gpt2_test.py
diff --git a/test/requirements.txt b/test/requirements.txt

Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`	`1`	`.idea`
	`2`	`+.benchmarks`
	`3`	`+__pycache__`