1
1
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2
2
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX9 %s
3
+ # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX12 %s
4
+
5
+ # For gfx12+, this test simply ensures that we don't crash in the face of manually
6
+ # inserted waitcnt intrinsics. They are still allowed for compatibility, but
7
+ # their effect in the HW is very conservative and code generation does not attempt
8
+ # to do anything with them. Developers who write code at such a low level should
9
+ # revisit their code for gfx12+ anyway.
3
10
4
11
---
5
12
name : test_waitcnt_preexisting_lgkmcnt_unmodified
@@ -17,6 +24,22 @@ body: |
17
24
; GFX9-NEXT: S_WAITCNT 112
18
25
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
19
26
; GFX9-NEXT: S_ENDPGM 0
27
+ ;
28
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_unmodified
29
+ ; GFX12: liveins: $vgpr0
30
+ ; GFX12-NEXT: {{ $}}
31
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
32
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
33
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
34
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
35
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
36
+ ; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
37
+ ; GFX12-NEXT: S_WAITCNT 49279
38
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
39
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
40
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
41
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
42
+ ; GFX12-NEXT: S_ENDPGM 0
20
43
$vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
21
44
S_WAITCNT 49279
22
45
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -40,6 +63,22 @@ body: |
40
63
; GFX9-NEXT: S_WAITCNT 112
41
64
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
42
65
; GFX9-NEXT: S_ENDPGM 0
66
+ ;
67
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_unmodified
68
+ ; GFX12: liveins: $vgpr0_vgpr1
69
+ ; GFX12-NEXT: {{ $}}
70
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
71
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
72
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
73
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
74
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
75
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
76
+ ; GFX12-NEXT: S_WAITCNT 3952
77
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
78
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
79
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
80
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
81
+ ; GFX12-NEXT: S_ENDPGM 0
43
82
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
44
83
S_WAITCNT 3952
45
84
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -65,6 +104,22 @@ body: |
65
104
; GFX9-NEXT: S_WAITCNT 112
66
105
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
67
106
; GFX9-NEXT: S_ENDPGM 0
107
+ ;
108
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt
109
+ ; GFX12: liveins: $vgpr0
110
+ ; GFX12-NEXT: {{ $}}
111
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
112
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
113
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
114
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
115
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
116
+ ; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
117
+ ; GFX12-NEXT: S_WAITCNT 3952
118
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
119
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
120
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
121
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
122
+ ; GFX12-NEXT: S_ENDPGM 0
68
123
$vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
69
124
S_WAITCNT 3952
70
125
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -88,6 +143,22 @@ body: |
88
143
; GFX9-NEXT: S_WAITCNT 112
89
144
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
90
145
; GFX9-NEXT: S_ENDPGM 0
146
+ ;
147
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt
148
+ ; GFX12: liveins: $vgpr0_vgpr1
149
+ ; GFX12-NEXT: {{ $}}
150
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
151
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
152
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
153
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
154
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
155
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
156
+ ; GFX12-NEXT: S_WAITCNT 49279
157
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
158
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
159
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
160
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
161
+ ; GFX12-NEXT: S_ENDPGM 0
91
162
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
92
163
S_WAITCNT 49279
93
164
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -115,6 +186,24 @@ body: |
115
186
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
116
187
; GFX9-NEXT: S_WAITCNT 112
117
188
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
189
+ ;
190
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_apply_all_counters
191
+ ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2
192
+ ; GFX12-NEXT: {{ $}}
193
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
194
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
195
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
196
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
197
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
198
+ ; GFX12-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
199
+ ; GFX12-NEXT: $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
200
+ ; GFX12-NEXT: S_WAITCNT 0
201
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
202
+ ; GFX12-NEXT: $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec
203
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
204
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
205
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
206
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
118
207
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
119
208
$vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
120
209
S_WAITCNT 0
@@ -136,6 +225,24 @@ body: |
136
225
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
137
226
; GFX9-NEXT: S_WAITCNT 0
138
227
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
228
+ ;
229
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt
230
+ ; GFX12: liveins: $vgpr0_vgpr1
231
+ ; GFX12-NEXT: {{ $}}
232
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
233
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
234
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
235
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
236
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
237
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
238
+ ; GFX12-NEXT: S_WAITCNT 0
239
+ ; GFX12-NEXT: S_WAITCNT 0
240
+ ; GFX12-NEXT: S_WAITCNT 0
241
+ ; GFX12-NEXT: S_WAITCNT 0
242
+ ; GFX12-NEXT: S_WAITCNT 0
243
+ ; GFX12-NEXT: S_WAITCNT 0
244
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
245
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
139
246
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
140
247
S_WAITCNT 0
141
248
S_WAITCNT 0
@@ -159,6 +266,20 @@ body: |
159
266
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
160
267
; GFX9-NEXT: S_WAITCNT 112
161
268
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
269
+ ;
270
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt_diff_counters
271
+ ; GFX12: liveins: $vgpr0_vgpr1
272
+ ; GFX12-NEXT: {{ $}}
273
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
274
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
275
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
276
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
277
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
278
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
279
+ ; GFX12-NEXT: S_WAITCNT 49279
280
+ ; GFX12-NEXT: S_WAITCNT 3952
281
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
282
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
162
283
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
163
284
S_WAITCNT 49279
164
285
S_WAITCNT 3952
@@ -185,6 +306,23 @@ body: |
185
306
; GFX9-NEXT: S_NOP 0
186
307
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
187
308
; GFX9-NEXT: S_ENDPGM 0
309
+ ;
310
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_early_wait
311
+ ; GFX12: liveins: $vgpr0_vgpr1
312
+ ; GFX12-NEXT: {{ $}}
313
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
314
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
315
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
316
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
317
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
318
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
319
+ ; GFX12-NEXT: S_WAITCNT 0
320
+ ; GFX12-NEXT: S_NOP 0
321
+ ; GFX12-NEXT: S_NOP 0
322
+ ; GFX12-NEXT: S_NOP 0
323
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
324
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
325
+ ; GFX12-NEXT: S_ENDPGM 0
188
326
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
189
327
S_WAITCNT 0
190
328
S_NOP 0
@@ -207,6 +345,18 @@ body: |
207
345
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
208
346
; GFX9-NEXT: S_WAITCNT 3952
209
347
; GFX9-NEXT: KILL $vgpr0
348
+ ;
349
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_ignore_kill
350
+ ; GFX12: liveins: $vgpr0_vgpr1
351
+ ; GFX12-NEXT: {{ $}}
352
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
353
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
354
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
355
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
356
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
357
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
358
+ ; GFX12-NEXT: S_WAITCNT 3952
359
+ ; GFX12-NEXT: KILL $vgpr0
210
360
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
211
361
S_WAITCNT 3952
212
362
KILL $vgpr0
@@ -221,6 +371,15 @@ body: |
221
371
; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
222
372
; GFX9: S_WAITCNT 0
223
373
; GFX9-NEXT: S_ENDPGM 0
374
+ ;
375
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_func_start
376
+ ; GFX12: S_WAIT_LOADCNT_DSCNT 0
377
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
378
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
379
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
380
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
381
+ ; GFX12-NEXT: S_WAITCNT 0
382
+ ; GFX12-NEXT: S_ENDPGM 0
224
383
S_WAITCNT 0
225
384
S_ENDPGM 0
226
385
...
@@ -241,6 +400,22 @@ body: |
241
400
; GFX9-NEXT: S_WAITCNT 112
242
401
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
243
402
; GFX9-NEXT: S_ENDPGM 0
403
+ ;
404
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_buffer_inv
405
+ ; GFX12: S_WAIT_LOADCNT_DSCNT 0
406
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
407
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
408
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
409
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
410
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
411
+ ; GFX12-NEXT: S_WAITCNT 3952
412
+ ; GFX12-NEXT: BUFFER_INVL2 implicit $exec
413
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
414
+ ; GFX12-NEXT: BUFFER_WBINVL1_VOL implicit $exec
415
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
416
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
417
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
418
+ ; GFX12-NEXT: S_ENDPGM 0
244
419
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
245
420
S_WAITCNT 3952
246
421
BUFFER_INVL2 implicit $exec
0 commit comments