Skip to content

Commit 878b06d

Browse files
authored
[CI] workaround MI300 node failure (#717)
* update workflow This is a combination of 3 commits. update install upstream rm scripts * disable upstream * try rocm label * use latest * test MI300 node * remove upstream install code
1 parent c824160 commit 878b06d

File tree

1 file changed

+15
-41
lines changed

1 file changed

+15
-41
lines changed

.github/workflows/amd_perf_kernel_Integration_tests.yml

Lines changed: 15 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,10 @@ on:
44
workflow_dispatch:
55
pull_request:
66
branches: [main_perf]
7-
merge_group:
8-
branches: [main_perf]
9-
types: [checks_requested]
107

118
concurrency:
129
group: ${{ github.ref }}
13-
cancel-in-progress: ${{ github.ref != 'refs/heads/main_perf' }}
10+
cancel-in-progress: true
1411

1512
permissions: read-all
1613

@@ -39,24 +36,8 @@ jobs:
3936
exit 1
4037
fi
4138
42-
Runner-Preparation-AMD:
43-
runs-on: ubuntu-latest
44-
timeout-minutes: 30
45-
outputs:
46-
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
47-
steps:
48-
- name: Prepare runner matrix
49-
id: set-matrix
50-
run: |
51-
if [ x"${{ github.repository }}" == x"ROCm/triton" ]; then
52-
echo '::set-output name=matrix-HIP::[["self-hosted", "gfx942"]]'
53-
else
54-
echo '::set-output name=matrix-HIP::[["ubuntu-latest"]]'
55-
fi
56-
5739
pre-commit:
5840
name: pre-commit (code formatting)
59-
needs: Runner-Preparation-AMD
6041
runs-on: ubuntu-latest
6142
steps:
6243
- name: Checkout
@@ -92,21 +73,20 @@ jobs:
9273
git diff
9374
9475
Integration-Tests-AMD:
95-
needs: Runner-Preparation-AMD
96-
if: needs.Runner-Preparation-AMD.outputs.matrix-HIP != ''
9776
runs-on: ${{ matrix.runner }}
98-
timeout-minutes: 90
9977
strategy:
10078
matrix:
101-
runner: ${{ fromJson(needs.Runner-Preparation-AMD.outputs.matrix-HIP) }}
102-
triton_version: [local, upstream]
79+
runner: [gfx942]
10380
fail-fast: false # disables failing the entire job when one matrix entry fails
10481
container:
10582
image: rocm/pytorch:latest
10683
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
10784
steps:
10885
- name: Checkout
10986
uses: actions/checkout@v4
87+
- name: Show Device Info
88+
run: |
89+
rocminfo | grep gfx
11090
- name: Clear cache
11191
run: |
11292
rm -rf ~/.triton
@@ -119,23 +99,17 @@ jobs:
11999
run: |
120100
python3 -m pip install --upgrade pip
121101
python3 -m pip install lit matplotlib pandas
122-
- name: Install Triton (local or upstream)
102+
- name: Uninstall Triton
103+
run : |
104+
pip uninstall -y triton
105+
rm -rf ~/.triton
106+
rm -rf ./triton/python/build
107+
- name: Install Triton
123108
run: |
124-
if [ "${{ matrix.triton_version }}" = "local" ]; then
125-
echo "Installing Local Triton..."
126-
pip uninstall -y triton
127-
cd python
128-
pip install -v -e .
129-
pip install numpy==1.26.4
130-
else
131-
echo "Installing Upstream Triton..."
132-
pip uninstall -y triton
133-
pip install matplotlib pandas pytest
134-
git clone https://github.com/triton-lang/triton
135-
cd triton
136-
pip install --verbose -e python
137-
cd ..
138-
fi
109+
pip uninstall -y triton
110+
cd python
111+
pip install -v -e .
112+
pip install numpy==1.26.4
139113
- name: Show Triton version
140114
run: |
141115
pip show triton

0 commit comments

Comments
 (0)