Skip to content

Commit ec1f28d

Browse files
authored
AMDGPU/gfx12: avoid crashing on legacy waitcnt intrinsics (llvm#92306)
They *are* still accepted by the HW but have a conservative effect. Leave them untouched since handling them would complicate the logic a bit, and developers who code to such a low level really need to revisit what they're doing anyway.
1 parent ee765b0 commit ec1f28d

File tree

2 files changed

+180
-0
lines changed

2 files changed

+180
-0
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,6 +1364,11 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
13641364
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
13651365
bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
13661366

1367+
// Don't crash if the programmer used legacy waitcnt intrinsics, but don't
1368+
// attempt to do more than that either.
1369+
if (Opcode == AMDGPU::S_WAITCNT)
1370+
continue;
1371+
13671372
if (Opcode == AMDGPU::S_WAIT_LOADCNT_DSCNT) {
13681373
unsigned OldEnc =
13691374
TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm();

llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX9 %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX12 %s
4+
5+
# For gfx12+, this test simply ensures that we don't crash in the face of manually
6+
# inserted waitcnt intrinsics. They are still allowed for compatibility, but
7+
# their effect in the HW is very conservative and code generation does not attempt
8+
# to do anything with them. Developers who write code at such a low level should
9+
# revisit their code for gfx12+ anyway.
310

411
---
512
name: test_waitcnt_preexisting_lgkmcnt_unmodified
@@ -17,6 +24,22 @@ body: |
1724
; GFX9-NEXT: S_WAITCNT 112
1825
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
1926
; GFX9-NEXT: S_ENDPGM 0
27+
;
28+
; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_unmodified
29+
; GFX12: liveins: $vgpr0
30+
; GFX12-NEXT: {{ $}}
31+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
32+
; GFX12-NEXT: S_WAIT_EXPCNT 0
33+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
34+
; GFX12-NEXT: S_WAIT_BVHCNT 0
35+
; GFX12-NEXT: S_WAIT_KMCNT 0
36+
; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
37+
; GFX12-NEXT: S_WAITCNT 49279
38+
; GFX12-NEXT: S_WAIT_DSCNT 0
39+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
40+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
41+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
42+
; GFX12-NEXT: S_ENDPGM 0
2043
$vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
2144
S_WAITCNT 49279
2245
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -40,6 +63,22 @@ body: |
4063
; GFX9-NEXT: S_WAITCNT 112
4164
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
4265
; GFX9-NEXT: S_ENDPGM 0
66+
;
67+
; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_unmodified
68+
; GFX12: liveins: $vgpr0_vgpr1
69+
; GFX12-NEXT: {{ $}}
70+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
71+
; GFX12-NEXT: S_WAIT_EXPCNT 0
72+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
73+
; GFX12-NEXT: S_WAIT_BVHCNT 0
74+
; GFX12-NEXT: S_WAIT_KMCNT 0
75+
; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
76+
; GFX12-NEXT: S_WAITCNT 3952
77+
; GFX12-NEXT: S_WAIT_LOADCNT 0
78+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
79+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
80+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
81+
; GFX12-NEXT: S_ENDPGM 0
4382
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
4483
S_WAITCNT 3952
4584
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -65,6 +104,22 @@ body: |
65104
; GFX9-NEXT: S_WAITCNT 112
66105
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
67106
; GFX9-NEXT: S_ENDPGM 0
107+
;
108+
; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt
109+
; GFX12: liveins: $vgpr0
110+
; GFX12-NEXT: {{ $}}
111+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
112+
; GFX12-NEXT: S_WAIT_EXPCNT 0
113+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
114+
; GFX12-NEXT: S_WAIT_BVHCNT 0
115+
; GFX12-NEXT: S_WAIT_KMCNT 0
116+
; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
117+
; GFX12-NEXT: S_WAITCNT 3952
118+
; GFX12-NEXT: S_WAIT_DSCNT 0
119+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
120+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
121+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
122+
; GFX12-NEXT: S_ENDPGM 0
68123
$vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
69124
S_WAITCNT 3952
70125
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -88,6 +143,22 @@ body: |
88143
; GFX9-NEXT: S_WAITCNT 112
89144
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
90145
; GFX9-NEXT: S_ENDPGM 0
146+
;
147+
; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt
148+
; GFX12: liveins: $vgpr0_vgpr1
149+
; GFX12-NEXT: {{ $}}
150+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
151+
; GFX12-NEXT: S_WAIT_EXPCNT 0
152+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
153+
; GFX12-NEXT: S_WAIT_BVHCNT 0
154+
; GFX12-NEXT: S_WAIT_KMCNT 0
155+
; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
156+
; GFX12-NEXT: S_WAITCNT 49279
157+
; GFX12-NEXT: S_WAIT_LOADCNT 0
158+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
159+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
160+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
161+
; GFX12-NEXT: S_ENDPGM 0
91162
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
92163
S_WAITCNT 49279
93164
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -115,6 +186,24 @@ body: |
115186
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
116187
; GFX9-NEXT: S_WAITCNT 112
117188
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
189+
;
190+
; GFX12-LABEL: name: test_waitcnt_preexisting_apply_all_counters
191+
; GFX12: liveins: $vgpr0_vgpr1, $vgpr2
192+
; GFX12-NEXT: {{ $}}
193+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
194+
; GFX12-NEXT: S_WAIT_EXPCNT 0
195+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
196+
; GFX12-NEXT: S_WAIT_BVHCNT 0
197+
; GFX12-NEXT: S_WAIT_KMCNT 0
198+
; GFX12-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
199+
; GFX12-NEXT: $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
200+
; GFX12-NEXT: S_WAITCNT 0
201+
; GFX12-NEXT: S_WAIT_DSCNT 0
202+
; GFX12-NEXT: $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec
203+
; GFX12-NEXT: S_WAIT_LOADCNT 0
204+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
205+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
206+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
118207
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
119208
$vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
120209
S_WAITCNT 0
@@ -136,6 +225,24 @@ body: |
136225
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
137226
; GFX9-NEXT: S_WAITCNT 0
138227
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
228+
;
229+
; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt
230+
; GFX12: liveins: $vgpr0_vgpr1
231+
; GFX12-NEXT: {{ $}}
232+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
233+
; GFX12-NEXT: S_WAIT_EXPCNT 0
234+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
235+
; GFX12-NEXT: S_WAIT_BVHCNT 0
236+
; GFX12-NEXT: S_WAIT_KMCNT 0
237+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
238+
; GFX12-NEXT: S_WAITCNT 0
239+
; GFX12-NEXT: S_WAITCNT 0
240+
; GFX12-NEXT: S_WAITCNT 0
241+
; GFX12-NEXT: S_WAITCNT 0
242+
; GFX12-NEXT: S_WAITCNT 0
243+
; GFX12-NEXT: S_WAITCNT 0
244+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
245+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
139246
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
140247
S_WAITCNT 0
141248
S_WAITCNT 0
@@ -159,6 +266,20 @@ body: |
159266
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
160267
; GFX9-NEXT: S_WAITCNT 112
161268
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
269+
;
270+
; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt_diff_counters
271+
; GFX12: liveins: $vgpr0_vgpr1
272+
; GFX12-NEXT: {{ $}}
273+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
274+
; GFX12-NEXT: S_WAIT_EXPCNT 0
275+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
276+
; GFX12-NEXT: S_WAIT_BVHCNT 0
277+
; GFX12-NEXT: S_WAIT_KMCNT 0
278+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
279+
; GFX12-NEXT: S_WAITCNT 49279
280+
; GFX12-NEXT: S_WAITCNT 3952
281+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
282+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
162283
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
163284
S_WAITCNT 49279
164285
S_WAITCNT 3952
@@ -185,6 +306,23 @@ body: |
185306
; GFX9-NEXT: S_NOP 0
186307
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
187308
; GFX9-NEXT: S_ENDPGM 0
309+
;
310+
; GFX12-LABEL: name: test_waitcnt_preexisting_early_wait
311+
; GFX12: liveins: $vgpr0_vgpr1
312+
; GFX12-NEXT: {{ $}}
313+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
314+
; GFX12-NEXT: S_WAIT_EXPCNT 0
315+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
316+
; GFX12-NEXT: S_WAIT_BVHCNT 0
317+
; GFX12-NEXT: S_WAIT_KMCNT 0
318+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
319+
; GFX12-NEXT: S_WAITCNT 0
320+
; GFX12-NEXT: S_NOP 0
321+
; GFX12-NEXT: S_NOP 0
322+
; GFX12-NEXT: S_NOP 0
323+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
324+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
325+
; GFX12-NEXT: S_ENDPGM 0
188326
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
189327
S_WAITCNT 0
190328
S_NOP 0
@@ -207,6 +345,18 @@ body: |
207345
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
208346
; GFX9-NEXT: S_WAITCNT 3952
209347
; GFX9-NEXT: KILL $vgpr0
348+
;
349+
; GFX12-LABEL: name: test_waitcnt_preexisting_ignore_kill
350+
; GFX12: liveins: $vgpr0_vgpr1
351+
; GFX12-NEXT: {{ $}}
352+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
353+
; GFX12-NEXT: S_WAIT_EXPCNT 0
354+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
355+
; GFX12-NEXT: S_WAIT_BVHCNT 0
356+
; GFX12-NEXT: S_WAIT_KMCNT 0
357+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
358+
; GFX12-NEXT: S_WAITCNT 3952
359+
; GFX12-NEXT: KILL $vgpr0
210360
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
211361
S_WAITCNT 3952
212362
KILL $vgpr0
@@ -221,6 +371,15 @@ body: |
221371
; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
222372
; GFX9: S_WAITCNT 0
223373
; GFX9-NEXT: S_ENDPGM 0
374+
;
375+
; GFX12-LABEL: name: test_waitcnt_preexisting_func_start
376+
; GFX12: S_WAIT_LOADCNT_DSCNT 0
377+
; GFX12-NEXT: S_WAIT_EXPCNT 0
378+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
379+
; GFX12-NEXT: S_WAIT_BVHCNT 0
380+
; GFX12-NEXT: S_WAIT_KMCNT 0
381+
; GFX12-NEXT: S_WAITCNT 0
382+
; GFX12-NEXT: S_ENDPGM 0
224383
S_WAITCNT 0
225384
S_ENDPGM 0
226385
...
@@ -241,6 +400,22 @@ body: |
241400
; GFX9-NEXT: S_WAITCNT 112
242401
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
243402
; GFX9-NEXT: S_ENDPGM 0
403+
;
404+
; GFX12-LABEL: name: test_waitcnt_preexisting_buffer_inv
405+
; GFX12: S_WAIT_LOADCNT_DSCNT 0
406+
; GFX12-NEXT: S_WAIT_EXPCNT 0
407+
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
408+
; GFX12-NEXT: S_WAIT_BVHCNT 0
409+
; GFX12-NEXT: S_WAIT_KMCNT 0
410+
; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
411+
; GFX12-NEXT: S_WAITCNT 3952
412+
; GFX12-NEXT: BUFFER_INVL2 implicit $exec
413+
; GFX12-NEXT: S_WAIT_LOADCNT 0
414+
; GFX12-NEXT: BUFFER_WBINVL1_VOL implicit $exec
415+
; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
416+
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
417+
; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
418+
; GFX12-NEXT: S_ENDPGM 0
244419
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
245420
S_WAITCNT 3952
246421
BUFFER_INVL2 implicit $exec

0 commit comments

Comments
 (0)