Skip to content

Commit 53a8d96

Browse files
committed
Add workflow for torchtune
1 parent cfcc43b commit 53a8d96

File tree

2 files changed

+181
-0
lines changed

2 files changed

+181
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
name: "_ascend_npu_torchtune"
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runner:
7+
required: true
8+
type: string
9+
description: "The runner selected to run on"
10+
image:
11+
required: true
12+
type: string
13+
description: "The docker image which will be loaded"
14+
device:
15+
required: true
16+
type: string
17+
description: "The device selected to run on"
18+
torch-artifact:
19+
required: false
20+
type: string
21+
description: "The distribution artifact name of torch"
22+
torch-npu-artifact:
23+
required: true
24+
type: string
25+
description: "The distribution artifact name of torch_npu"
26+
27+
defaults:
28+
run:
29+
shell: bash -el {0}
30+
31+
jobs:
32+
torchtune:
33+
name: run torchtune for torch_npu
34+
runs-on: ${{ inputs.runner }}
35+
container:
36+
image: ${{ inputs.image }}
37+
env:
38+
HF_ENDPOINT: https://hf-mirror.com
39+
40+
steps:
41+
- name: Show NPU info
42+
run: |
43+
npu-smi info
44+
45+
- name: Config mirrors
46+
run: |
47+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
48+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
49+
50+
- name: Install system dependencies
51+
run: |
52+
apt-get update
53+
apt-get install -y \
54+
git gcc g++ make cmake ninja-build curl \
55+
libgl1 libglib2.0-0 libsndfile1
56+
57+
# See: https://github.com/actions/checkout/issues/363#issuecomment-1915075699
58+
# See: https://github.com/hunshcn/gh-proxy/issues/28#issuecomment-773769630
59+
- name: Config git
60+
run: |
61+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
62+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
63+
64+
- name: Checkout
65+
uses: actions/checkout@v4
66+
67+
- name: Checkout torchtune
68+
uses: actions/checkout@v4
69+
with:
70+
repository: pytorch/torchtune
71+
path: torchtune
72+
73+
- name: Install torchtune
74+
working-directory: torchtune
75+
run: |
76+
pip install -e .
77+
78+
- name: Download torch artifact
79+
if: ${{ inputs.torch-artifact }}
80+
uses: actions/download-artifact@v4
81+
with:
82+
name: ${{ inputs.torch-artifact }}
83+
84+
- name: Install torch
85+
if: ${{ inputs.torch-artifact }}
86+
run: |
87+
pip install ${{ inputs.torch-artifact }}
88+
89+
- name: Install torch_npu dependencies
90+
if: ${{ !inputs.torch-artifact }}
91+
run: |
92+
pip install -r https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt
93+
94+
- name: List torch version
95+
id: list-torch-version
96+
shell: bash
97+
run: |
98+
torch_version=$(python -c "import torch; print(torch.__version__)")
99+
echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT
100+
101+
- name: Download torch_npu artifact
102+
uses: actions/download-artifact@v4
103+
with:
104+
name: ${{ inputs.torch-npu-artifact }}
105+
path: ascend_npu
106+
107+
- name: Install torch_npu
108+
working-directory: ascend_npu
109+
run: |
110+
pip install ${{ inputs.torch-npu-artifact }}
111+
112+
- name: Show environment info
113+
run: |
114+
pip list
115+
116+
- name: Download Qwen2.5 model
117+
run: |
118+
export HF_ENDPOINT=https://hf-mirror.com
119+
huggingface-cli download --resume-download Qwen/Qwen2.5-0.5B-Instruct \
120+
--local-dir /tmp/Qwen2.5-0.5B-Instruct \
121+
122+
- name: Run torchtune with lora finetune
123+
run: |
124+
tune run lora_finetune_single_device --config qwen2_5/0.5B_lora_single_device
125+
126+
- name: Run torchtune with full finetune
127+
run: |
128+
tune run full_finetune_single_device --config qwen2_5/0.5B_full_single_device

.github/workflows/ascend_npu_test.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ on:
1111
- ".github/workflows/_ascend_npu_ut.yml"
1212
- ".github/workflows/_ascend_npu_benchmark.yml"
1313
- ".github/workflows/_ascend_npu_torchtitan.yml"
14+
- ".github/workflows/_ascend_npu_torchtune.yml"
1415
- ".ci/**"
1516
- "ascend_npu/**"
1617
- "src/**"
@@ -25,6 +26,7 @@ on:
2526
- ".github/workflows/_ascend_npu_ut.yml"
2627
- ".github/workflows/_ascend_npu_benchmark.yml"
2728
- ".github/workflows/_ascend_npu_torchtitan.yml"
29+
- ".github/workflows/_ascend_npu_torchtune.yml"
2830
- ".ci/**"
2931
- "ascend_npu/**"
3032
- "src/**"
@@ -120,6 +122,41 @@ jobs:
120122
image: ${{ needs.prepare.outputs.image }}
121123
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
122124

125+
test:
126+
name: Test torch_npu
127+
needs:
128+
- prepare
129+
- build-torch
130+
- build
131+
if: |
132+
!cancelled() && github.event_name != 'repository_dispatch' &&
133+
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
134+
uses: ./.github/workflows/_ascend_npu_ut.yml
135+
with:
136+
runner: ${{ needs.prepare.outputs.runner }}
137+
image: ${{ needs.prepare.outputs.image }}
138+
device: ${{ needs.prepare.outputs.device }}
139+
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
140+
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
141+
142+
benchmark:
143+
name: Run benchmarks
144+
needs:
145+
- prepare
146+
- build-torch
147+
- build
148+
if: |
149+
!cancelled() && github.event_name != 'repository_dispatch' &&
150+
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
151+
uses: ./.github/workflows/_ascend_npu_benchmark.yml
152+
with:
153+
runner: ${{ needs.prepare.outputs.runner }}
154+
image: ${{ needs.prepare.outputs.image }}
155+
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
156+
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
157+
secrets:
158+
pr-token: ${{ secrets.COSDT_BOT_TOKEN }}
159+
123160
torchtitan:
124161
name: Run torchtitan
125162
needs:
@@ -136,3 +173,19 @@ jobs:
136173
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
137174
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
138175

176+
torchtune:
177+
name: Run torchtune for torch_npu
178+
needs:
179+
- prepare
180+
- build-torch
181+
- build
182+
if: |
183+
!cancelled() && github.event_name != 'repository_dispatch' &&
184+
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
185+
uses: ./.github/workflows/_ascend_npu_torchtune.yml
186+
with:
187+
runner: ${{ needs.prepare.outputs.runner }}
188+
image: ${{ needs.prepare.outputs.image }}
189+
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
190+
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
191+

0 commit comments

Comments
 (0)