Skip to content

Commit a2ba986

Browse files
authored
NPU add titan test (#55)
1 parent 98c40e7 commit a2ba986

File tree

2 files changed

+142
-20
lines changed

2 files changed

+142
-20
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
name: "_ascend_npu_torchtitan"
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runner:
7+
required: true
8+
type: string
9+
description: "The runner selected to run on"
10+
image:
11+
required: true
12+
type: string
13+
description: "The docker image which will be loaded"
14+
device:
15+
required: true
16+
type: string
17+
description: "The device selected to run on"
18+
torch-artifact:
19+
required: false
20+
type: string
21+
description: "The distribution artifact name of torch"
22+
torch-npu-artifact:
23+
required: true
24+
type: string
25+
description: "The distribution artifact name of torch_npu"
26+
secrets:
27+
pr-token:
28+
description: "A token used to create a pull request"
29+
required: true
30+
31+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
32+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
33+
# It's used to activate ascend-toolkit environment variables.
34+
35+
defaults:
36+
run:
37+
shell: bash -el {0}
38+
39+
jobs:
40+
setup_environment:
41+
name: run torchtitan tests
42+
runs-on: ${{ inputs.runner }}
43+
container:
44+
image: ${{ inputs.image }}
45+
env:
46+
HF_ENDPOINT: https://hf-mirror.com
47+
outputs:
48+
torch_version: ${{ steps.get_torch_version.outputs.torch-version }}
49+
npu_info: ${{ steps.check_npu.outputs.npu_info }}
50+
steps:
51+
- name: Show NPU info
52+
run: |
53+
npu-smi info
54+
55+
- name: Config mirrors
56+
run: |
57+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
58+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
59+
60+
- name: Install system dependencies
61+
run: |
62+
apt-get update
63+
apt-get install -y \
64+
git gcc g++ make cmake ninja-build curl \
65+
libgl1 libglib2.0-0 libsndfile1
66+
67+
- name: Config git
68+
run: |
69+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
70+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
71+
72+
- name: Checkout
73+
uses: actions/checkout@v4
74+
75+
- name: Checkout benchmark
76+
uses: actions/checkout@v4
77+
with:
78+
repository: pytorch/torchtitan
79+
path: torchtitan
80+
81+
- name: Download torch artifact
82+
if: ${{ inputs.torch-artifact }}
83+
uses: actions/download-artifact@v4
84+
with:
85+
name: ${{ inputs.torch-artifact }}
86+
87+
- name: Install torch
88+
if: ${{ inputs.torch-artifact }}
89+
run: |
90+
pip install ${{ inputs.torch-artifact }}
91+
92+
- name: Install torch_npu dependencies
93+
if: ${{ !inputs.torch-artifact }}
94+
run: |
95+
pip install -r https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt
96+
97+
- name: List torch version
98+
id: list-torch-version
99+
shell: bash
100+
run: |
101+
torch_version=$(python -c "import torch; print(torch.__version__)")
102+
echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT
103+
104+
- name: Download torch_npu artifact
105+
uses: actions/download-artifact@v4
106+
with:
107+
name: ${{ inputs.torch-npu-artifact }}
108+
path: ascend_npu
109+
110+
- name: Install torch_npu
111+
working-directory: ascend_npu
112+
run: |
113+
pip install ${{ inputs.torch-npu-artifact }}
114+
115+
- name: Install project dependencies
116+
run: |
117+
pip install -r requirements.txt
118+
pip install pytest pytest-cov tyro
119+
120+
- name: Show environment info
121+
run: |
122+
npu_is_available=$(python -c "import torch; print(torch.npu.is_available())")
123+
npu_count=$(python -c "import torch; print(torch.npu.device_count())")
124+
echo "NPU is available: ${npu_is_available}"
125+
echo "NPU count: ${npu_count}"
126+
pip list | grep -E 'torch|numpy'
127+
128+
- name: Run torchtitan integration_test
129+
working-directory: torchtitan
130+
run: |
131+
npu_count=$(python -c "import torch; print(torch.npu.device_count())")
132+
python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu ${npu_count}
133+
134+
- name: Run torchtitan unittest
135+
working-directory: torchtitan
136+
run: |
137+
pytest ./tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv

.github/workflows/ascend_npu_test.yml

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ on:
1010
- ".github/workflows/_ascend_npu_build_torch_npu.yml"
1111
- ".github/workflows/_ascend_npu_ut.yml"
1212
- ".github/workflows/_ascend_npu_benchmark.yml"
13+
- ".github/workflows/_ascend_npu_torchtitan.yml"
1314
- ".ci/**"
1415
- "ascend_npu/**"
1516
- "src/**"
@@ -23,6 +24,7 @@ on:
2324
- ".github/workflows/_ascend_npu_build_torch_npu.yml"
2425
- ".github/workflows/_ascend_npu_ut.yml"
2526
- ".github/workflows/_ascend_npu_benchmark.yml"
27+
- ".github/workflows/_ascend_npu_torchtitan.yml"
2628
- ".ci/**"
2729
- "ascend_npu/**"
2830
- "src/**"
@@ -118,36 +120,19 @@ jobs:
118120
image: ${{ needs.prepare.outputs.image }}
119121
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
120122

121-
test:
122-
name: Test torch_npu
123+
torchtitan:
124+
name: Run torchtitan
123125
needs:
124126
- prepare
125127
- build-torch
126128
- build
127129
if: |
128130
!cancelled() && github.event_name != 'repository_dispatch' &&
129131
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
130-
uses: ./.github/workflows/_ascend_npu_ut.yml
132+
uses: ./.github/workflows/_ascend_npu_torchtitan.yml
131133
with:
132134
runner: ${{ needs.prepare.outputs.runner }}
133135
image: ${{ needs.prepare.outputs.image }}
134136
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
135137
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
136138

137-
benchmark:
138-
name: Run benchmarks
139-
needs:
140-
- prepare
141-
- build-torch
142-
- build
143-
if: |
144-
!cancelled() && github.event_name != 'repository_dispatch' &&
145-
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
146-
uses: ./.github/workflows/_ascend_npu_benchmark.yml
147-
with:
148-
runner: ${{ needs.prepare.outputs.runner }}
149-
image: ${{ needs.prepare.outputs.image }}
150-
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
151-
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
152-
secrets:
153-
pr-token: ${{ secrets.COSDT_BOT_TOKEN }}

0 commit comments

Comments
 (0)