16
16
runner : ${{ fromJson(inputs.matrix) }}
17
17
include :
18
18
- image : rocm/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan
19
+ runner : ["self-hosted", "gfx90a"]
20
+ # Cache save/restore is on the host machine at directory /home/runner/.triton, while in the docker
21
+ # container expect it at /github/home/.triton. So map here to make sure visible in docker.
22
+ options : >-
23
+ --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
24
+ --volume /home/runner/.triton:/github/home/.triton
25
+ - image : rocm/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan
26
+ runner : ["amd-gfx942"]
27
+ # We add --env-file to pull in HIP_VISIBLE_DEVICES and ROCR_VISIBLE_DEVICES definition for GPU isolation.
28
+ options : >-
29
+ --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
30
+ --env-file /etc/podinfo/gha-gpu-isolation-settings
31
+ --volume /home/runner/.triton:/github/home/.triton
19
32
- image : rocm/7.0-preview:rocm7.0_preview_ubuntu22.04_llama2_70b_training_mlperf_mi35X_prealpha
20
33
runner : ["amd-gfx950"]
34
+ options : >-
35
+ --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
36
+ --env-file /etc/podinfo/gha-gpu-isolation-settings
37
+ --volume /home/runner/.triton:/github/home/.triton
21
38
env :
22
39
RUNNER_TYPE : ${{ matrix.runner[1] }}
23
40
TRITON_BUILD_WITH_CCACHE : " true"
29
46
CCACHE_COMPRESS : " true"
30
47
container :
31
48
image : ${{ matrix.image }}
32
- # Cache save/restore is on the host machine at directory /home/runner/.triton, while in the docker
33
- # container expect it at /github/home/.triton. So map here to make sure visible in docker.
34
- options : >-
35
- --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
36
- --volume /home/runner/.triton:/github/home/.triton
49
+ options : ${{ matrix.options }}
37
50
steps :
38
51
- name : Checkout
39
52
uses : actions/checkout@v4
94
107
run : ccache --print-stats
95
108
- name : Run lit tests
96
109
run : make test-lit
110
+ - name : Run C++ unittests
111
+ run : make test-cpp
97
112
- name : Run python tests on AMD
98
113
run : |
99
114
INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/triton/instrumentation"
@@ -145,13 +160,13 @@ jobs:
145
160
python3 -m pytest -s -n 8 ./test_cast_matmul.py
146
161
- name : Run Proton tests
147
162
run : |
163
+ unset HIP_VISIBLE_DEVICES
164
+ unset ROCR_VISIBLE_DEVICES
148
165
if [ "${{ matrix.runner[0] }}" = "amd-gfx950" ]; then
149
166
python3 -m pytest -s -n 8 third_party/proton/test -k "not test_instrument_exec"
150
167
else
151
168
make test-proton
152
169
fi
153
- - name : Run C++ unittests
154
- run : make test-cpp
155
170
- name : Inspect cache directories
156
171
run : |
157
172
mkdir -p ~/.triton
@@ -160,7 +175,8 @@ jobs:
160
175
mkdir -p ~/.ccache
161
176
du -h -d 1 ~/.ccache
162
177
- name : Clean up caches
163
- # Always cleanup the worker, even if builds or tests failed
178
+ # Always cleanup the worker, even if builds or tests failed given that these directories are
179
+ # mapped from the host and we write files as the root user in the docker.
164
180
if : always()
165
181
run : |
166
182
rm -rf ~/.triton/cache
0 commit comments