Skip to content

Commit b2718a9

Browse files
committed
NPU add titan test
1 parent cff188a commit b2718a9

File tree

2 files changed

+161
-1
lines changed

2 files changed

+161
-1
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
name: "_ascend_npu_torchtitan"
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runner:
7+
required: true
8+
type: string
9+
description: "The runner selected to run on"
10+
image:
11+
required: true
12+
type: string
13+
description: "The docker image which will be loaded"
14+
device:
15+
required: true
16+
type: string
17+
description: "The device selected to run on"
18+
torch-artifact:
19+
required: false
20+
type: string
21+
description: "The distribution artifact name of torch"
22+
torch-npu-artifact:
23+
required: true
24+
type: string
25+
description: "The distribution artifact name of torch_npu"
26+
secrets:
27+
pr-token:
28+
description: "A token used to create a pull request"
29+
required: true
30+
31+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
32+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
33+
# It's used to activate ascend-toolkit environment variables.
34+
35+
defaults:
36+
run:
37+
shell: bash -el {0}
38+
39+
jobs:
40+
setup_environment:
41+
name: Run Torchtitan Environment
42+
runs-on: ${{ inputs.runner }}
43+
container:
44+
image: ${{ inputs.image }}
45+
env:
46+
HF_ENDPOINT: https://hf-mirror.com
47+
outputs:
48+
torch_version: ${{ steps.get_torch_version.outputs.torch-version }}
49+
npu_info: ${{ steps.check_npu.outputs.npu_info }}
50+
steps:
51+
- name: Show NPU info
52+
run: |
53+
npu-smi info
54+
55+
- name: Config mirrors
56+
run: |
57+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
58+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
59+
60+
- name: Install system dependencies
61+
run: |
62+
apt-get update
63+
apt-get install -y \
64+
git gcc g++ make cmake ninja-build curl \
65+
libgl1 libglib2.0-0 libsndfile1
66+
67+
- name: Config git
68+
run: |
69+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
70+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
71+
72+
- name: Checkout
73+
uses: actions/checkout@v4
74+
75+
- name: Checkout benchmark
76+
uses: actions/checkout@v4
77+
with:
78+
repository: pytorch/torchtitan
79+
path: torchtitan
80+
81+
- name: Download torch artifact
82+
if: ${{ inputs.torch-artifact }}
83+
uses: actions/download-artifact@v4
84+
with:
85+
name: ${{ inputs.torch-artifact }}
86+
87+
- name: Install torch
88+
if: ${{ inputs.torch-artifact }}
89+
run: |
90+
pip install ${{ inputs.torch-artifact }}
91+
92+
- name: Install torch_npu dependencies
93+
if: ${{ !inputs.torch-artifact }}
94+
run: |
95+
pip install -r https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt
96+
97+
- name: List torch version
98+
id: list-torch-version
99+
shell: bash
100+
run: |
101+
torch_version=$(python -c "import torch; print(torch.__version__)")
102+
echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT
103+
104+
- name: Download torch_npu artifact
105+
uses: actions/download-artifact@v4
106+
with:
107+
name: ${{ inputs.torch-npu-artifact }}
108+
path: ascend_npu
109+
110+
- name: Install torch_npu
111+
working-directory: ascend_npu
112+
run: |
113+
pip install ${{ inputs.torch-npu-artifact }}
114+
115+
- name: Install nightly torchvision and torchaudio
116+
run: |
117+
pip install --pre torchvision torchaudio --no-deps --index-url https://download.pytorch.org/whl/nightly/cpu
118+
119+
- name: Install project dependencies
120+
run: |
121+
pip install -r requirements.txt
122+
pip install pytest pytest-cov
123+
124+
- name: Show environment info
125+
run: |
126+
npu_is_available=$(python -c "import torch; print(torch.npu.is_available())")
127+
npu_count=$(python -c "import torch; print(torch.npu.device_count())")
128+
echo "NPU is available: ${npu_is_available}"
129+
echo "NPU count: ${npu_count}"
130+
pip list | grep -E 'torch|numpy'
131+
132+
- name: Run torchtitan integration_test
133+
run: |
134+
mkdir artifacts-to-be-uploaded
135+
npu_count=$(python -c "import torch; print(torch.npu.device_count())")
136+
python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu ${npu_count}
137+
138+
- name: Run torchtitan unittest
139+
run: |
140+
pytest ./tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv

.github/workflows/ascend_npu_test.yml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ on:
1010
- ".github/workflows/_ascend_npu_build_torch_npu.yml"
1111
- ".github/workflows/_ascend_npu_ut.yml"
1212
- ".github/workflows/_ascend_npu_benchmark.yml"
13+
- ".github/workflows/_ascend_npu_torchtitan.yml"
1314
- ".ci/**"
1415
- "ascend_npu/**"
1516
- "src/**"
@@ -23,6 +24,7 @@ on:
2324
- ".github/workflows/_ascend_npu_build_torch_npu.yml"
2425
- ".github/workflows/_ascend_npu_ut.yml"
2526
- ".github/workflows/_ascend_npu_benchmark.yml"
27+
- ".github/workflows/_ascend_npu_torchtitan.yml"
2628
- ".ci/**"
2729
- "ascend_npu/**"
2830
- "src/**"
@@ -157,7 +159,6 @@ jobs:
157159
- prepare
158160
- build-torch
159161
- build
160-
- test
161162
if: |
162163
!cancelled() && github.event_name != 'repository_dispatch' &&
163164
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
@@ -170,3 +171,22 @@ jobs:
170171
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
171172
secrets:
172173
pr-token: ${{ secrets.COSDT_BOT_TOKEN }}
174+
175+
torchtitan:
176+
name: Run torchtitan
177+
needs:
178+
- prepare
179+
- build-torch
180+
- build
181+
if: |
182+
!cancelled() && github.event_name != 'repository_dispatch' &&
183+
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
184+
uses: ./.github/workflows/_ascend_npu_torchtitan.yml
185+
with:
186+
runner: "linux-arm64-npu-2"
187+
image: ${{ needs.prepare.outputs.image }}
188+
device: ${{ needs.prepare.outputs.device }}
189+
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
190+
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
191+
secrets:
192+
pr-token: ${{ secrets.COSDT_BOT_TOKEN }}

0 commit comments

Comments
 (0)