@@ -165,16 +165,17 @@ false:
165165define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare (i32 inreg %v ) {
166166; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
167167; CHECK: ; %bb.0:
168- ; CHECK-NEXT: s_and_b32 s0, 1, s0
169- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
170- ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
171- ; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
172- ; CHECK-NEXT: ; %bb.1: ; %false
173- ; CHECK-NEXT: s_mov_b32 s0, 33
174- ; CHECK-NEXT: s_branch .LBB10_3
175- ; CHECK-NEXT: .LBB10_2: ; %true
168+ ; CHECK-NEXT: s_xor_b32 s0, s0, 1
169+ ; CHECK-NEXT: s_xor_b32 s0, s0, 1
170+ ; CHECK-NEXT: s_and_b32 s0, s0, 1
171+ ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
172+ ; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
173+ ; CHECK-NEXT: ; %bb.1: ; %true
176174; CHECK-NEXT: s_mov_b32 s0, 42
177175; CHECK-NEXT: s_branch .LBB10_3
176+ ; CHECK-NEXT: .LBB10_2: ; %false
177+ ; CHECK-NEXT: s_mov_b32 s0, 33
178+ ; CHECK-NEXT: s_branch .LBB10_3
178179; CHECK-NEXT: .LBB10_3:
179180 %c = trunc i32 %v to i1
180181 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
@@ -262,17 +263,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
262263; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
263264; CHECK: ; %bb.0:
264265; CHECK-NEXT: s_cmp_lt_u32 s0, 12
265- ; CHECK-NEXT: s_cselect_b32 s0, 1, 0
266- ; CHECK-NEXT: s_and_b32 s0, 1, s0
267- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
268- ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
269- ; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
270- ; CHECK-NEXT: ; %bb.1: ; %false
271- ; CHECK-NEXT: s_mov_b32 s0, 33
272- ; CHECK-NEXT: s_branch .LBB14_3
273- ; CHECK-NEXT: .LBB14_2: ; %true
266+ ; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
267+ ; CHECK-NEXT: ; %bb.1: ; %true
274268; CHECK-NEXT: s_mov_b32 s0, 42
275269; CHECK-NEXT: s_branch .LBB14_3
270+ ; CHECK-NEXT: .LBB14_2: ; %false
271+ ; CHECK-NEXT: s_mov_b32 s0, 33
272+ ; CHECK-NEXT: s_branch .LBB14_3
276273; CHECK-NEXT: .LBB14_3:
277274 %c = icmp ult i32 %v , 12
278275 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
@@ -377,16 +374,15 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
377374; CHECK-NEXT: s_cmp_gt_u32 s1, 34
378375; CHECK-NEXT: s_cselect_b32 s1, 1, 0
379376; CHECK-NEXT: s_and_b32 s0, s0, s1
380- ; CHECK-NEXT: s_and_b32 s0, 1, s0
381- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
382- ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
383- ; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
384- ; CHECK-NEXT: ; %bb.1: ; %false
385- ; CHECK-NEXT: s_mov_b32 s0, 33
386- ; CHECK-NEXT: s_branch .LBB18_3
387- ; CHECK-NEXT: .LBB18_2: ; %true
377+ ; CHECK-NEXT: s_and_b32 s0, s0, 1
378+ ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
379+ ; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
380+ ; CHECK-NEXT: ; %bb.1: ; %true
388381; CHECK-NEXT: s_mov_b32 s0, 42
389382; CHECK-NEXT: s_branch .LBB18_3
383+ ; CHECK-NEXT: .LBB18_2: ; %false
384+ ; CHECK-NEXT: s_mov_b32 s0, 33
385+ ; CHECK-NEXT: s_branch .LBB18_3
390386; CHECK-NEXT: .LBB18_3:
391387 %v1c = icmp ult i32 %v1 , 12
392388 %v2c = icmp ugt i32 %v2 , 34
0 commit comments