Skip to content

Commit 9567d51

Browse files
committed
Rebase, handle barrier patch
1 parent c6180d9 commit 9567d51

11 files changed

+731
-424
lines changed

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2202,7 +2202,8 @@ bool SIGfx10CacheControl::insertBarrierStart(
22022202
// mode. This is because a CU mode release fence does not emit any wait, which
22032203
// is fine when only dealing with vmem, but isn't sufficient in the presence
22042204
// of barriers which do not go through vmem.
2205-
if (!ST.isCuModeEnabled())
2205+
// GFX12.5 does not require this additional wait.
2206+
if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts())
22062207
return false;
22072208

22082209
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),

llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll

Lines changed: 69 additions & 39 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
3838
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
3939
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
4040
; GFX1250-NEXT: s_wait_storecnt 0x0
41+
; GFX1250-NEXT: s_wait_xcnt 0x0
4142
; GFX1250-NEXT: s_wait_kmcnt 0x0
4243
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
4344
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -79,6 +80,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
7980
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
8081
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
8182
; GFX1250-NEXT: s_wait_storecnt 0x0
83+
; GFX1250-NEXT: s_wait_xcnt 0x0
8284
; GFX1250-NEXT: s_wait_kmcnt 0x0
8385
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
8486
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -189,6 +191,7 @@ define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half>
189191
; GFX1250: ; %bb.0:
190192
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
191193
; GFX1250-NEXT: s_wait_kmcnt 0x0
194+
; GFX1250-NEXT: s_wait_storecnt 0x0
192195
; GFX1250-NEXT: ds_pk_add_rtn_f16 v0, v0, v1
193196
; GFX1250-NEXT: s_wait_dscnt 0x0
194197
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -255,6 +258,7 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
255258
; GFX1250: ; %bb.0:
256259
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
257260
; GFX1250-NEXT: s_wait_kmcnt 0x0
261+
; GFX1250-NEXT: s_wait_storecnt 0x0
258262
; GFX1250-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1
259263
; GFX1250-NEXT: s_wait_dscnt 0x0
260264
; GFX1250-NEXT: s_set_pc_i64 s[30:31]

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef
130130
; GFX1250: ; %bb.0: ; %entry
131131
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
132132
; GFX1250-NEXT: s_wait_kmcnt 0x0
133+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
133134
; GFX1250-NEXT: s_wait_storecnt 0x0
134135
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
135136
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -144,6 +145,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef
144145
; GFX1250: ; %bb.0: ; %entry
145146
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
146147
; GFX1250-NEXT: s_wait_kmcnt 0x0
148+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
147149
; GFX1250-NEXT: s_wait_storecnt 0x0
148150
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
149151
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -158,6 +160,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef
158160
; GFX1250: ; %bb.0: ; %entry
159161
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
160162
; GFX1250-NEXT: s_wait_kmcnt 0x0
163+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
161164
; GFX1250-NEXT: s_wait_storecnt 0x0
162165
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
163166
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -220,6 +223,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef
220223
; GFX1250: ; %bb.0: ; %entry
221224
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
222225
; GFX1250-NEXT: s_wait_kmcnt 0x0
226+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
223227
; GFX1250-NEXT: s_wait_storecnt 0x0
224228
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
225229
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -234,6 +238,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef
234238
; GFX1250: ; %bb.0: ; %entry
235239
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
236240
; GFX1250-NEXT: s_wait_kmcnt 0x0
241+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
237242
; GFX1250-NEXT: s_wait_storecnt 0x0
238243
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
239244
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -248,6 +253,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef
248253
; GFX1250: ; %bb.0: ; %entry
249254
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
250255
; GFX1250-NEXT: s_wait_kmcnt 0x0
256+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
251257
; GFX1250-NEXT: s_wait_storecnt 0x0
252258
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
253259
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -385,6 +391,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr
385391
; GFX1250: ; %bb.0: ; %entry
386392
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
387393
; GFX1250-NEXT: s_wait_kmcnt 0x0
394+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
388395
; GFX1250-NEXT: s_wait_storecnt 0x0
389396
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
390397
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -399,6 +406,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr
399406
; GFX1250: ; %bb.0: ; %entry
400407
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
401408
; GFX1250-NEXT: s_wait_kmcnt 0x0
409+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
402410
; GFX1250-NEXT: s_wait_storecnt 0x0
403411
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
404412
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -413,6 +421,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr
413421
; GFX1250: ; %bb.0: ; %entry
414422
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
415423
; GFX1250-NEXT: s_wait_kmcnt 0x0
424+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
416425
; GFX1250-NEXT: s_wait_storecnt 0x0
417426
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
418427
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -475,6 +484,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr
475484
; GFX1250: ; %bb.0: ; %entry
476485
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
477486
; GFX1250-NEXT: s_wait_kmcnt 0x0
487+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
478488
; GFX1250-NEXT: s_wait_storecnt 0x0
479489
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
480490
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -489,6 +499,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr
489499
; GFX1250: ; %bb.0: ; %entry
490500
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
491501
; GFX1250-NEXT: s_wait_kmcnt 0x0
502+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
492503
; GFX1250-NEXT: s_wait_storecnt 0x0
493504
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
494505
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -503,6 +514,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr
503514
; GFX1250: ; %bb.0: ; %entry
504515
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
505516
; GFX1250-NEXT: s_wait_kmcnt 0x0
517+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
506518
; GFX1250-NEXT: s_wait_storecnt 0x0
507519
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
508520
; GFX1250-NEXT: s_wait_dscnt 0x0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef
124124
; GFX1250: ; %bb.0: ; %entry
125125
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
126126
; GFX1250-NEXT: s_wait_kmcnt 0x0
127+
; GFX1250-NEXT: s_wait_storecnt 0x0
127128
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
128129
; GFX1250-NEXT: s_wait_dscnt 0x0
129130
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -137,6 +138,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef
137138
; GFX1250: ; %bb.0: ; %entry
138139
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
139140
; GFX1250-NEXT: s_wait_kmcnt 0x0
141+
; GFX1250-NEXT: s_wait_storecnt 0x0
140142
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
141143
; GFX1250-NEXT: s_wait_dscnt 0x0
142144
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -150,6 +152,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef
150152
; GFX1250: ; %bb.0: ; %entry
151153
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
152154
; GFX1250-NEXT: s_wait_kmcnt 0x0
155+
; GFX1250-NEXT: s_wait_storecnt 0x0
153156
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
154157
; GFX1250-NEXT: s_wait_dscnt 0x0
155158
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -163,6 +166,7 @@ define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef r
163166
; GFX1250: ; %bb.0: ; %entry
164167
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
165168
; GFX1250-NEXT: s_wait_kmcnt 0x0
169+
; GFX1250-NEXT: s_wait_storecnt 0x0
166170
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
167171
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
168172
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -176,6 +180,7 @@ define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr nou
176180
; GFX1250: ; %bb.0: ; %entry
177181
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
178182
; GFX1250-NEXT: s_wait_kmcnt 0x0
183+
; GFX1250-NEXT: s_wait_storecnt 0x0
179184
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
180185
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
181186
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -189,6 +194,7 @@ define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr nou
189194
; GFX1250: ; %bb.0: ; %entry
190195
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
191196
; GFX1250-NEXT: s_wait_kmcnt 0x0
197+
; GFX1250-NEXT: s_wait_storecnt 0x0
192198
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
193199
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
194200
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -202,6 +208,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef
202208
; GFX1250: ; %bb.0: ; %entry
203209
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
204210
; GFX1250-NEXT: s_wait_kmcnt 0x0
211+
; GFX1250-NEXT: s_wait_storecnt 0x0
205212
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
206213
; GFX1250-NEXT: s_wait_dscnt 0x0
207214
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -215,6 +222,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef
215222
; GFX1250: ; %bb.0: ; %entry
216223
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
217224
; GFX1250-NEXT: s_wait_kmcnt 0x0
225+
; GFX1250-NEXT: s_wait_storecnt 0x0
218226
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
219227
; GFX1250-NEXT: s_wait_dscnt 0x0
220228
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -228,6 +236,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef
228236
; GFX1250: ; %bb.0: ; %entry
229237
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
230238
; GFX1250-NEXT: s_wait_kmcnt 0x0
239+
; GFX1250-NEXT: s_wait_storecnt 0x0
231240
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
232241
; GFX1250-NEXT: s_wait_dscnt 0x0
233242
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -358,6 +367,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr
358367
; GFX1250: ; %bb.0: ; %entry
359368
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
360369
; GFX1250-NEXT: s_wait_kmcnt 0x0
370+
; GFX1250-NEXT: s_wait_storecnt 0x0
361371
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
362372
; GFX1250-NEXT: s_wait_dscnt 0x0
363373
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -371,6 +381,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr
371381
; GFX1250: ; %bb.0: ; %entry
372382
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
373383
; GFX1250-NEXT: s_wait_kmcnt 0x0
384+
; GFX1250-NEXT: s_wait_storecnt 0x0
374385
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
375386
; GFX1250-NEXT: s_wait_dscnt 0x0
376387
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -384,6 +395,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr
384395
; GFX1250: ; %bb.0: ; %entry
385396
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
386397
; GFX1250-NEXT: s_wait_kmcnt 0x0
398+
; GFX1250-NEXT: s_wait_storecnt 0x0
387399
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
388400
; GFX1250-NEXT: s_wait_dscnt 0x0
389401
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -397,6 +409,7 @@ define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr no
397409
; GFX1250: ; %bb.0: ; %entry
398410
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
399411
; GFX1250-NEXT: s_wait_kmcnt 0x0
412+
; GFX1250-NEXT: s_wait_storecnt 0x0
400413
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
401414
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
402415
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -410,6 +423,7 @@ define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(
410423
; GFX1250: ; %bb.0: ; %entry
411424
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
412425
; GFX1250-NEXT: s_wait_kmcnt 0x0
426+
; GFX1250-NEXT: s_wait_storecnt 0x0
413427
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
414428
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
415429
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -423,6 +437,7 @@ define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(
423437
; GFX1250: ; %bb.0: ; %entry
424438
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
425439
; GFX1250-NEXT: s_wait_kmcnt 0x0
440+
; GFX1250-NEXT: s_wait_storecnt 0x0
426441
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
427442
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
428443
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -436,6 +451,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr
436451
; GFX1250: ; %bb.0: ; %entry
437452
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
438453
; GFX1250-NEXT: s_wait_kmcnt 0x0
454+
; GFX1250-NEXT: s_wait_storecnt 0x0
439455
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
440456
; GFX1250-NEXT: s_wait_dscnt 0x0
441457
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -449,6 +465,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr
449465
; GFX1250: ; %bb.0: ; %entry
450466
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
451467
; GFX1250-NEXT: s_wait_kmcnt 0x0
468+
; GFX1250-NEXT: s_wait_storecnt 0x0
452469
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
453470
; GFX1250-NEXT: s_wait_dscnt 0x0
454471
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -462,6 +479,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr
462479
; GFX1250: ; %bb.0: ; %entry
463480
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
464481
; GFX1250-NEXT: s_wait_kmcnt 0x0
482+
; GFX1250-NEXT: s_wait_storecnt 0x0
465483
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
466484
; GFX1250-NEXT: s_wait_dscnt 0x0
467485
; GFX1250-NEXT: s_set_pc_i64 s[30:31]

llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ define amdgpu_kernel void @test_s_barrier() {
4545
;
4646
; GFX1250-LABEL: test_s_barrier:
4747
; GFX1250: ; %bb.0: ; %entry
48-
; GFX1250-NEXT: s_wait_alu 0xffe3
4948
; GFX1250-NEXT: s_barrier_signal -1
5049
; GFX1250-NEXT: s_barrier_wait -1
5150
; GFX1250-NEXT: s_endpgm
@@ -103,8 +102,8 @@ define amdgpu_kernel void @test_s_barrier_workgroup_fence() {
103102
;
104103
; GFX1250-LABEL: test_s_barrier_workgroup_fence:
105104
; GFX1250: ; %bb.0: ; %entry
106-
; GFX1250-NEXT: s_wait_dscnt 0x0
107-
; GFX1250-NEXT: s_wait_alu 0xffe3
105+
; GFX1250-NEXT: s_wait_storecnt 0x0
106+
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
108107
; GFX1250-NEXT: s_barrier_signal -1
109108
; GFX1250-NEXT: s_barrier_wait -1
110109
; GFX1250-NEXT: s_endpgm
@@ -168,11 +167,9 @@ define amdgpu_kernel void @test_s_barrier_agent_fence() {
168167
;
169168
; GFX1250-LABEL: test_s_barrier_agent_fence:
170169
; GFX1250: ; %bb.0: ; %entry
171-
; GFX1250-NEXT: s_wait_bvhcnt 0x0
172-
; GFX1250-NEXT: s_wait_samplecnt 0x0
170+
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
173171
; GFX1250-NEXT: s_wait_storecnt 0x0
174172
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
175-
; GFX1250-NEXT: s_wait_alu 0xffe3
176173
; GFX1250-NEXT: s_barrier_signal -1
177174
; GFX1250-NEXT: s_barrier_wait -1
178175
; GFX1250-NEXT: s_endpgm

0 commit comments

Comments
 (0)