Skip to content

Commit 130ae30

Browse files
committed
Add the torchtitan workflow
1 parent 581ba07 commit 130ae30

File tree

2 files changed

+144
-0
lines changed

2 files changed

+144
-0
lines changed
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
name: "_ascend_npu_torchtitan"
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runner:
7+
required: true
8+
type: string
9+
description: "The runner selected to run on"
10+
image:
11+
required: true
12+
type: string
13+
description: "The docker image which will be loaded"
14+
torch-artifact:
15+
required: false
16+
type: string
17+
description: "The distribution artifact name of torch"
18+
torch-npu-artifact:
19+
required: true
20+
type: string
21+
description: "The distribution artifact name of torch_npu"
22+
23+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
24+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
25+
# It's used to activate ascend-toolkit environment variables.
26+
27+
jobs:
28+
setup_environment:
29+
name: run torchtitan tests
30+
runs-on: ${{ inputs.runner }}
31+
container:
32+
image: ${{ inputs.image }}
33+
env:
34+
HF_ENDPOINT: https://hf-mirror.com
35+
outputs:
36+
torch_version: ${{ steps.get_torch_version.outputs.torch-version }}
37+
npu_info: ${{ steps.check_npu.outputs.npu_info }}
38+
steps:
39+
- name: Show NPU info
40+
run: |
41+
npu-smi info
42+
43+
- name: Config mirrors
44+
run: |
45+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
46+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
47+
48+
- name: Install system dependencies
49+
run: |
50+
apt-get update
51+
apt-get install -y \
52+
git gcc g++ make cmake ninja-build curl \
53+
libgl1 libglib2.0-0 libsndfile1
54+
55+
- name: Config git
56+
run: |
57+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
58+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
59+
60+
- name: Checkout
61+
uses: actions/checkout@v4
62+
63+
- name: Checkout benchmark
64+
uses: actions/checkout@v4
65+
with:
66+
repository: pytorch/torchtitan
67+
path: torchtitan
68+
69+
- name: Download torch artifact
70+
if: ${{ inputs.torch-artifact }}
71+
uses: actions/download-artifact@v4
72+
with:
73+
name: ${{ inputs.torch-artifact }}
74+
75+
- name: Install torch
76+
if: ${{ inputs.torch-artifact }}
77+
run: |
78+
pip install ${{ inputs.torch-artifact }}
79+
80+
- name: Install torch_npu dependencies
81+
if: ${{ !inputs.torch-artifact }}
82+
run: |
83+
pip install -r https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt
84+
85+
- name: List torch version
86+
id: list-torch-version
87+
shell: bash
88+
run: |
89+
torch_version=$(python -c "import torch; print(torch.__version__)")
90+
echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT
91+
92+
- name: Download torch_npu artifact
93+
uses: actions/download-artifact@v4
94+
with:
95+
name: ${{ inputs.torch-npu-artifact }}
96+
path: ascend_npu
97+
98+
- name: Install torch_npu
99+
working-directory: ascend_npu
100+
run: |
101+
pip install ${{ inputs.torch-npu-artifact }}
102+
103+
- name: Install project dependencies
104+
run: |
105+
pip install pytest pytest-cov tyro tabulate
106+
107+
- name: Show environment info
108+
id: check_npu
109+
run: |
110+
npu_is_available=$(python -c "import torch; print(torch.npu.is_available())")
111+
npu_count=$(python -c "import torch; print(torch.npu.device_count())")
112+
echo "npu_count=${npu_count}" >> $GITHUB_OUTPUT
113+
echo "NPU is available: ${npu_is_available}"
114+
echo "NPU count: ${npu_count}"
115+
pip list | grep -E 'torch|numpy'
116+
117+
- name: Run torchtitan integration_test
118+
working-directory: torchtitan
119+
run: |
120+
mkdir artifacts-to-be-uploaded
121+
python -m tests.integration_tests.run_tests --test_name artifacts-to-be-uploaded --ngpu ${{ steps.check_npu.outputs.npu_count }} || true
122+
123+
- name: Run torchtitan unittest
124+
working-directory: torchtitan·
125+
run: |
126+
pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv

.github/workflows/ascend_npu_test.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,21 @@ jobs:
168168
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
169169
secrets:
170170
hf-token: ${{ secrets.HF_TOKEN }}
171+
172+
torchtitan:
173+
name: Run torchtitan tests
174+
needs:
175+
- prepare
176+
- build-torch
177+
- build
178+
if: |
179+
!cancelled() && github.event_name != 'repository_dispatch' &&
180+
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
181+
uses: ./.github/workflows/_ascend_npu_torchtitan.yml
182+
with:
183+
runner: ${{ needs.prepare.outputs.runner }}
184+
image: ${{ needs.prepare.outputs.image }}
185+
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
186+
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
187+
secrets:
188+
pr-token: ${{ secrets.ASCEND_RUNNER_TOKEN }}

0 commit comments

Comments
 (0)