@@ -41,13 +41,12 @@ define amdgpu_cs void @atomic_add(<4 x i32> inreg %arg) {
4141; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
4242; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4343; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
44- ; GCN-NEXT: s_cbranch_execz .LBB0_2
4544; GCN-NEXT: ; %bb.1:
4645; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
4746; GCN-NEXT: v_mov_b32_e32 v0, 0
4847; GCN-NEXT: v_mov_b32_e32 v1, s4
4948; GCN-NEXT: buffer_atomic_add v1, v0, s[0:3], 0 idxen
50- ; GCN-NEXT: .LBB0_2 :
49+ ; GCN-NEXT: ; %bb.2 :
5150; GCN-NEXT: s_endpgm
5251.entry:
5352 call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32 (i32 1 , <4 x i32 > %arg , i32 0 , i32 0 , i32 0 , i32 0 )
@@ -87,13 +86,12 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
8786; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
8887; GCN-NEXT: ; implicit-def: $vgpr1
8988; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
90- ; GCN-NEXT: s_cbranch_execz .LBB1_2
9189; GCN-NEXT: ; %bb.1:
9290; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
9391; GCN-NEXT: v_mov_b32_e32 v1, s6
9492; GCN-NEXT: v_mov_b32_e32 v2, 0
9593; GCN-NEXT: buffer_atomic_add v1, v2, s[0:3], 0 idxen glc
96- ; GCN-NEXT: .LBB1_2 :
94+ ; GCN-NEXT: ; %bb.2 :
9795; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
9896; GCN-NEXT: s_waitcnt vmcnt(0)
9997; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -139,13 +137,12 @@ define amdgpu_cs void @atomic_sub(<4 x i32> inreg %arg) {
139137; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
140138; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
141139; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
142- ; GCN-NEXT: s_cbranch_execz .LBB2_2
143140; GCN-NEXT: ; %bb.1:
144141; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
145142; GCN-NEXT: v_mov_b32_e32 v0, 0
146143; GCN-NEXT: v_mov_b32_e32 v1, s4
147144; GCN-NEXT: buffer_atomic_sub v1, v0, s[0:3], 0 idxen
148- ; GCN-NEXT: .LBB2_2 :
145+ ; GCN-NEXT: ; %bb.2 :
149146; GCN-NEXT: s_endpgm
150147.entry:
151148 call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32 (i32 1 , <4 x i32 > %arg , i32 0 , i32 0 , i32 0 , i32 0 )
@@ -185,13 +182,12 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
185182; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
186183; GCN-NEXT: ; implicit-def: $vgpr1
187184; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
188- ; GCN-NEXT: s_cbranch_execz .LBB3_2
189185; GCN-NEXT: ; %bb.1:
190186; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
191187; GCN-NEXT: v_mov_b32_e32 v1, s6
192188; GCN-NEXT: v_mov_b32_e32 v2, 0
193189; GCN-NEXT: buffer_atomic_sub v1, v2, s[0:3], 0 idxen glc
194- ; GCN-NEXT: .LBB3_2 :
190+ ; GCN-NEXT: ; %bb.2 :
195191; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
196192; GCN-NEXT: s_waitcnt vmcnt(0)
197193; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -238,14 +234,13 @@ define amdgpu_cs void @atomic_xor(<4 x i32> inreg %arg) {
238234; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
239235; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
240236; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
241- ; GCN-NEXT: s_cbranch_execz .LBB4_2
242237; GCN-NEXT: ; %bb.1:
243238; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
244239; GCN-NEXT: s_and_b32 s4, s4, 1
245240; GCN-NEXT: v_mov_b32_e32 v0, 0
246241; GCN-NEXT: v_mov_b32_e32 v1, s4
247242; GCN-NEXT: buffer_atomic_xor v1, v0, s[0:3], 0 idxen
248- ; GCN-NEXT: .LBB4_2 :
243+ ; GCN-NEXT: ; %bb.2 :
249244; GCN-NEXT: s_endpgm
250245.entry:
251246 call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32 (i32 1 , <4 x i32 > %arg , i32 0 , i32 0 , i32 0 , i32 0 )
@@ -287,14 +282,13 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
287282; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
288283; GCN-NEXT: ; implicit-def: $vgpr1
289284; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
290- ; GCN-NEXT: s_cbranch_execz .LBB5_2
291285; GCN-NEXT: ; %bb.1:
292286; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
293287; GCN-NEXT: s_and_b32 s6, s6, 1
294288; GCN-NEXT: v_mov_b32_e32 v1, s6
295289; GCN-NEXT: v_mov_b32_e32 v2, 0
296290; GCN-NEXT: buffer_atomic_xor v1, v2, s[0:3], 0 idxen glc
297- ; GCN-NEXT: .LBB5_2 :
291+ ; GCN-NEXT: ; %bb.2 :
298292; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
299293; GCN-NEXT: s_waitcnt vmcnt(0)
300294; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -341,13 +335,12 @@ define amdgpu_cs void @atomic_ptr_add(ptr addrspace(8) inreg %arg) {
341335; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
342336; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
343337; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
344- ; GCN-NEXT: s_cbranch_execz .LBB6_2
345338; GCN-NEXT: ; %bb.1:
346339; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
347340; GCN-NEXT: v_mov_b32_e32 v0, 0
348341; GCN-NEXT: v_mov_b32_e32 v1, s4
349342; GCN-NEXT: buffer_atomic_add v1, v0, s[0:3], 0 idxen
350- ; GCN-NEXT: .LBB6_2 :
343+ ; GCN-NEXT: ; %bb.2 :
351344; GCN-NEXT: s_endpgm
352345.entry:
353346 call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32 (i32 1 , ptr addrspace (8 ) %arg , i32 0 , i32 0 , i32 0 , i32 0 )
@@ -389,13 +382,12 @@ define amdgpu_cs void @atomic_ptr_add_and_format(ptr addrspace(8) inreg %arg) {
389382; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
390383; GCN-NEXT: ; implicit-def: $vgpr1
391384; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
392- ; GCN-NEXT: s_cbranch_execz .LBB7_2
393385; GCN-NEXT: ; %bb.1:
394386; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
395387; GCN-NEXT: v_mov_b32_e32 v1, s6
396388; GCN-NEXT: v_mov_b32_e32 v2, 0
397389; GCN-NEXT: buffer_atomic_add v1, v2, s[0:3], 0 idxen glc
398- ; GCN-NEXT: .LBB7_2 :
390+ ; GCN-NEXT: ; %bb.2 :
399391; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
400392; GCN-NEXT: s_waitcnt vmcnt(0)
401393; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -443,13 +435,12 @@ define amdgpu_cs void @atomic_ptr_sub(ptr addrspace(8) inreg %arg) {
443435; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
444436; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
445437; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
446- ; GCN-NEXT: s_cbranch_execz .LBB8_2
447438; GCN-NEXT: ; %bb.1:
448439; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
449440; GCN-NEXT: v_mov_b32_e32 v0, 0
450441; GCN-NEXT: v_mov_b32_e32 v1, s4
451442; GCN-NEXT: buffer_atomic_sub v1, v0, s[0:3], 0 idxen
452- ; GCN-NEXT: .LBB8_2 :
443+ ; GCN-NEXT: ; %bb.2 :
453444; GCN-NEXT: s_endpgm
454445.entry:
455446 call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub.i32 (i32 1 , ptr addrspace (8 ) %arg , i32 0 , i32 0 , i32 0 , i32 0 )
@@ -491,13 +482,12 @@ define amdgpu_cs void @atomic_ptr_sub_and_format(ptr addrspace(8) inreg %arg) {
491482; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
492483; GCN-NEXT: ; implicit-def: $vgpr1
493484; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
494- ; GCN-NEXT: s_cbranch_execz .LBB9_2
495485; GCN-NEXT: ; %bb.1:
496486; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
497487; GCN-NEXT: v_mov_b32_e32 v1, s6
498488; GCN-NEXT: v_mov_b32_e32 v2, 0
499489; GCN-NEXT: buffer_atomic_sub v1, v2, s[0:3], 0 idxen glc
500- ; GCN-NEXT: .LBB9_2 :
490+ ; GCN-NEXT: ; %bb.2 :
501491; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
502492; GCN-NEXT: s_waitcnt vmcnt(0)
503493; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -546,14 +536,13 @@ define amdgpu_cs void @atomic_ptr_xor(ptr addrspace(8) inreg %arg) {
546536; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
547537; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
548538; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
549- ; GCN-NEXT: s_cbranch_execz .LBB10_2
550539; GCN-NEXT: ; %bb.1:
551540; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
552541; GCN-NEXT: s_and_b32 s4, s4, 1
553542; GCN-NEXT: v_mov_b32_e32 v0, 0
554543; GCN-NEXT: v_mov_b32_e32 v1, s4
555544; GCN-NEXT: buffer_atomic_xor v1, v0, s[0:3], 0 idxen
556- ; GCN-NEXT: .LBB10_2 :
545+ ; GCN-NEXT: ; %bb.2 :
557546; GCN-NEXT: s_endpgm
558547.entry:
559548 call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32 (i32 1 , ptr addrspace (8 ) %arg , i32 0 , i32 0 , i32 0 , i32 0 )
@@ -597,14 +586,13 @@ define amdgpu_cs void @atomic_ptr_xor_and_format(ptr addrspace(8) inreg %arg) {
597586; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
598587; GCN-NEXT: ; implicit-def: $vgpr1
599588; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
600- ; GCN-NEXT: s_cbranch_execz .LBB11_2
601589; GCN-NEXT: ; %bb.1:
602590; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
603591; GCN-NEXT: s_and_b32 s6, s6, 1
604592; GCN-NEXT: v_mov_b32_e32 v1, s6
605593; GCN-NEXT: v_mov_b32_e32 v2, 0
606594; GCN-NEXT: buffer_atomic_xor v1, v2, s[0:3], 0 idxen glc
607- ; GCN-NEXT: .LBB11_2 :
595+ ; GCN-NEXT: ; %bb.2 :
608596; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
609597; GCN-NEXT: s_waitcnt vmcnt(0)
610598; GCN-NEXT: v_readfirstlane_b32 s4, v1
0 commit comments