Skip to content

Commit c71ea21

Browse files
authored
[HPU] Add initial hpu bridge build (#5)
Including below workflows: - HPU test trigger entrance - PyTorch CPU build - HPU bridge build - HPU UT test [fake] - HPU E2E test based on torchbench [fake]
1 parent 166c89f commit c71ea21

File tree

5 files changed

+881
-0
lines changed

5 files changed

+881
-0
lines changed

.github/workflows/_build_torch.yml

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
name: '_build_torch'
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runner:
7+
required: true
8+
type: string
9+
description: 'The runner selected to run on'
10+
image:
11+
required: true
12+
type: string
13+
description: 'The docker image which will be used to build'
14+
pr-number:
15+
required: false
16+
type: string
17+
default: ''
18+
description: 'The number of pull request'
19+
ref:
20+
required: false
21+
type: string
22+
default: 'refs/heads/main'
23+
description: 'The branch, tag or SHA to checkout'
24+
python_version:
25+
required: false
26+
type: string
27+
default: '3.10'
28+
description: 'The python version to use'
29+
outputs:
30+
torch-artifact:
31+
description: 'The distribution artifact name of torch'
32+
value: ${{ jobs.build.outputs.dist-name }}
33+
34+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
35+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
36+
# It's used to activate ascend-toolkit environment variables.
37+
defaults:
38+
run:
39+
shell: bash -el {0}
40+
41+
jobs:
42+
build:
43+
name: build torch for ${{ inputs.pr-number && format('#{0}', inputs.pr-number) || inputs.ref }}
44+
runs-on: ${{ inputs.runner }}
45+
container:
46+
image: ${{ inputs.image }}
47+
outputs:
48+
dist-name: ${{ steps.list-dist.outputs.dist-name }}
49+
steps:
50+
- name: Checkout
51+
uses: actions/checkout@v4
52+
53+
- name: Setup Python environment
54+
uses: actions/setup-python@v5
55+
id: setup-python
56+
with:
57+
python-version: ${{ inputs.python_version }}
58+
- name: Setup CPATH
59+
run: |
60+
echo "python version : ${{ steps.setup-python.outputs.python-version }}"
61+
python_version="${{ inputs.python_version }}"
62+
echo "python path : ${{ steps.setup-python.outputs.python-path }}"
63+
CPATH=${pythonLocation}/include/python${python_version}
64+
echo CPATH=${CPATH} >> "${GITHUB_ENV}"
65+
66+
# See: https://github.com/actions/checkout/issues/363#issuecomment-1915075699
67+
- name: Config git
68+
run: |
69+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
70+
71+
- name: Checkout PyTorch
72+
uses: actions/checkout@v4
73+
with:
74+
repository: pytorch/pytorch
75+
# ref: ${{ inputs.ref }}
76+
ref: v2.6.0
77+
submodules: recursive
78+
path: pytorch
79+
80+
- name: View commit history
81+
working-directory: pytorch
82+
run: |
83+
git log -n 10 --graph | cat
84+
85+
- name: Install torch dependencies
86+
working-directory: pytorch
87+
run: |
88+
pip install -r requirements.txt
89+
pip install cmake==3.31.6
90+
91+
- name: Build torch
92+
working-directory: pytorch
93+
run: |
94+
python setup.py build bdist_wheel
95+
96+
- name: List distribution package
97+
id: list-dist
98+
working-directory: pytorch/dist
99+
run: |
100+
dist_name=$(ls torch*.whl)
101+
dist_path=$(pwd)/${dist_name}
102+
echo "dist-name=${dist_name}" >> $GITHUB_OUTPUT
103+
echo "dist-path=${dist_path}" >> $GITHUB_OUTPUT
104+
105+
- name: Install torch
106+
working-directory: pytorch/dist
107+
run: |
108+
pip install ${{ steps.list-dist.outputs.dist-name }}
109+
110+
- name: List torch version
111+
id: list-version
112+
run: |
113+
torch_version=$(python -c "import torch; print(torch.__version__)")
114+
echo "torch version: ${torch_version}"
115+
echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT
116+
117+
- name: Upload distribution artifact
118+
id: upload-dist
119+
uses: actions/upload-artifact@v4
120+
with:
121+
name: ${{ steps.list-dist.outputs.dist-name }}
122+
path: ${{ steps.list-dist.outputs.dist-path }}
123+
if-no-files-found: error
124+
retention-days: 1
125+
overwrite: true
126+
127+
- name: Write to workflow job summary
128+
if: ${{ steps.upload-dist.outputs.artifact-url }}
129+
run: |
130+
echo "## torch-${{ steps.list-version.outputs.torch-version }} built successfully! :rocket:" >> $GITHUB_STEP_SUMMARY
131+
echo "You can download the distribution package [here](${{ steps.upload-dist.outputs.artifact-url }})." >> $GITHUB_STEP_SUMMARY
132+
133+
- name: Cleanup workspace
134+
if: always()
135+
run: rm -rf ${{ github.workspace }}/*
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
2+
name: '_gaudi_hpu_benchmark'
3+
4+
on:
5+
workflow_call:
6+
inputs:
7+
runner:
8+
required: true
9+
type: string
10+
description: 'The runner selected to run on'
11+
image:
12+
required: true
13+
type: string
14+
description: 'The docker image which will be loaded'
15+
device:
16+
required: true
17+
type: string
18+
description: 'The device selected to run on'
19+
torch-artifact:
20+
required: false
21+
type: string
22+
description: 'The distribution artifact name of torch'
23+
torch-hpu-artifact:
24+
required: true
25+
type: string
26+
description: 'The distribution artifact name of torch_hpu'
27+
torch-hpu-dl-artifact:
28+
required: true
29+
type: string
30+
description: 'The distribution artifact name of torch_hpu dataloader'
31+
python_version:
32+
required: false
33+
type: string
34+
default: '3.10'
35+
description: 'The python version to use'
36+
secrets:
37+
pr-token:
38+
description: 'A token used to create a pull request'
39+
required: true
40+
41+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
42+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
43+
# It's used to activate ascend-toolkit environment variables.
44+
defaults:
45+
run:
46+
shell: bash -el {0}
47+
48+
jobs:
49+
test:
50+
name: run benchmarks for torch_hpu
51+
runs-on: ${{ inputs.runner }}
52+
container:
53+
image: ${{ inputs.image }}
54+
volumes:
55+
- /usr/local/dcmi:/usr/local/dcmi
56+
- /usr/local/bin/hl-smi:/usr/local/bin/hl-smi
57+
env:
58+
HABANA_VISIBLE_DEVICES: all
59+
OMPI_MCA_btl_vader_single_copy_mechanism: none
60+
options: >-
61+
--ipc host
62+
--cap-add sys_nice
63+
--runtime habana
64+
steps:
65+
- name: Show HPU info
66+
run: |
67+
hl-smi info
68+
69+
- name: Setup Python environment
70+
uses: actions/setup-python@v5
71+
id: setup-python
72+
with:
73+
python-version: ${{ inputs.python_version }}
74+
- name: Setup CPATH
75+
run: |
76+
echo "python version : ${{ steps.setup-python.outputs.python-version }}"
77+
python_version="${{ inputs.python_version }}"
78+
echo "python path : ${{ steps.setup-python.outputs.python-path }}"
79+
CPATH=${pythonLocation}/include/python${python_version}
80+
echo CPATH=${CPATH} >> "${GITHUB_ENV}"
81+
82+
# See: https://github.com/actions/checkout/issues/363#issuecomment-1915075699
83+
- name: Config git
84+
run: |
85+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
86+
87+
- name: Checkout
88+
uses: actions/checkout@v4
89+
90+
- name: Checkout PyTorch
91+
uses: actions/checkout@v4
92+
with:
93+
repository: pytorch/pytorch
94+
# ref: ${{ inputs.ref }}
95+
ref: v2.6.0
96+
submodules: recursive
97+
path: pytorch
98+
99+
- name: Checkout torch_hpu
100+
uses: actions/checkout@v4
101+
with:
102+
repository: HabanaAI/gaudi-pytorch-bridge
103+
ref: v1.20.0
104+
path: torch_hpu
105+
106+
- name: Install pip dependencies
107+
working-directory: torch_hpu
108+
run: |
109+
pip install -r requirements.txt
110+
111+
- name: Download torch artifact
112+
if: ${{ inputs.torch-artifact }}
113+
uses: actions/download-artifact@v4
114+
with:
115+
name: ${{ inputs.torch-artifact }}
116+
117+
- name: Install torch
118+
if: ${{ inputs.torch-artifact }}
119+
run: |
120+
pip install ${{ inputs.torch-artifact }}
121+
122+
- name: Download torch_hpu artifact
123+
uses: actions/download-artifact@v4
124+
with:
125+
# name: ${{ inputs.torch-hpu-artifact }}
126+
name: torch_hpu_dists
127+
path: torch_hpu
128+
129+
- name: Install torch_hpu
130+
working-directory: torch_hpu
131+
run: |
132+
unzip torch_hpu_dists.zip && cd torch_hpu_dists
133+
pip install ${{ inputs.torch-hpu-artifact }}
134+
pip install ${{ inputs.torch-hpu-dl-artifact }}
135+
136+
- name: List torch version
137+
id: list-torch-version
138+
shell: bash
139+
run: |
140+
torch_version=$(python -c "import torch; print(torch.__version__)")
141+
torch_git_version=$(python -c "import torch; print(torch.version.git_version)")
142+
torchbench_version=$(curl https://raw.githubusercontent.com/pytorch/pytorch/${torch_git_version}/.github/ci_commit_pins/torchbench.txt)
143+
torchvision_version=$(curl https://raw.githubusercontent.com/pytorch/pytorch/${torch_git_version}/.github/ci_commit_pins/vision.txt)
144+
torchaudio_version=$(curl https://raw.githubusercontent.com/pytorch/pytorch/${torch_git_version}/.github/ci_commit_pins/audio.txt)
145+
hf_version=$(curl https://raw.githubusercontent.com/pytorch/pytorch/${torch_git_version}/.ci/docker/ci_commit_pins/huggingface.txt)
146+
echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT
147+
echo "torch-git-version=${torch_git_version}" >> $GITHUB_OUTPUT
148+
echo "torchbench-version=${torchbench_version}" >> $GITHUB_OUTPUT
149+
echo "torchvision-version=${torchvision_version}" >> $GITHUB_OUTPUT
150+
echo "torchaudio-version=${torchaudio_version}" >> $GITHUB_OUTPUT
151+
echo "hf-version=${hf_version}" >> $GITHUB_OUTPUT
152+
153+
- name: Show environment info
154+
run: |
155+
export PT_HPU_LAZY_MODE=0
156+
hpu_is_available=$(python -c "import torch; print(torch.hpu.is_available())")
157+
hpu_count=$(python -c "import torch; print(torch.hpu.device_count())")
158+
echo "HPU is available: ${hpu_is_available}"
159+
echo "HPU count: ${hpu_count}"
160+
pip list | grep -E 'torch|numpy'
161+
162+
- name: Checkout benchmark
163+
uses: actions/checkout@v4
164+
with:
165+
repository: pytorch/benchmark
166+
ref: ${{ steps.list-torch-version.outputs.torchbench-version }}
167+
path: benchmark
168+
169+
- name: Checkout vision
170+
uses: actions/checkout@v4
171+
with:
172+
repository: pytorch/vision
173+
ref: ${{ steps.list-torch-version.outputs.torchvision-version }}
174+
path: vision
175+
176+
- name: Checkout audio
177+
uses: actions/checkout@v4
178+
with:
179+
repository: pytorch/audio
180+
ref: ${{ steps.list-torch-version.outputs.torchaudio-version }}
181+
path: audio
182+
183+
- name: Install torchvision torchaudio and transfoermers
184+
run: |
185+
cd vision && python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl && cd ..
186+
cd audio && python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install dist/*.whl && cd ..
187+
pip install --force-reinstall git+https://github.com/huggingface/transformers@${{ steps.list-torch-version.outputs.hf-version }}
188+
189+
- name: Install benchmark dependencies
190+
run: |
191+
pip install -r benchmark/requirements.txt
192+
193+
- name: Install dependencies for all the models
194+
run: |
195+
python benchmark/install.py --userbenchmark test_bench --continue_on_fail
196+
197+
- name: Install project dependencies
198+
run: |
199+
pip install -r requirements.txt
200+
201+
- name: Show environment info
202+
run: |
203+
export PT_HPU_LAZY_MODE=0
204+
hpu_is_available=$(python -c "import torch; print(torch.hpu.is_available())")
205+
hpu_count=$(python -c "import torch; print(torch.hpu.device_count())")
206+
echo "HPU is available: ${hpu_is_available}"
207+
echo "HPU count: ${hpu_count}"
208+
pip list | grep -E 'torch|numpy'
209+
210+
- name: Run benchmarks
211+
working-directory: benchmark
212+
run: |
213+
echo "Run torchbench"
214+
touch gaudi_hpu_benchmark.json
215+
# python run_benchmark.py test_bench --accuracy --device hpu --test eval \
216+
# --output gaudi_hpu_benchmark.json
217+
218+
- name: Upload the benchmark report file
219+
id: upload-report
220+
uses: actions/upload-artifact@v4
221+
with:
222+
name: gaudi_hpu_benchmark.json
223+
path: benchmark/gaudi_hpu_benchmark.json
224+
if-no-files-found: error
225+
retention-days: 1
226+
overwrite: true
227+
228+
- name: Write to workflow job summary
229+
run: |
230+
echo "Write to workflow job summary"
231+
# python .ci/benchmark.py --write-gh-job-summary --path benchmark/gaudi_hpu_benchmark.json
232+
233+
- name: Write to workflow job summary
234+
run: |
235+
echo "TODO: update test results dashboard"
236+
237+
- name: Cleanup workspace
238+
if: always()
239+
run: rm -rf ${{ github.workspace }}/*

0 commit comments

Comments
 (0)