@@ -116,9 +116,9 @@ false:
116116define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare (i32 inreg %v ) {
117117; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
118118; CHECK: ; %bb.0:
119- ; CHECK-NEXT: s_xor_b32 s0, s0, 1
120- ; CHECK-NEXT: s_and_b32 s0, s0, 1
121- ; CHECK-NEXT: s_cmp_lg_u32 s0 , 0
119+ ; CHECK-NEXT: s_and_b32 s0, 1, s0
120+ ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
121+ ; CHECK-NEXT: s_cmp_eq_u64 s[0:1] , 0
122122; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
123123; CHECK-NEXT: ; %bb.1: ; %true
124124; CHECK-NEXT: s_mov_b32 s0, 42
@@ -164,17 +164,16 @@ false:
164164define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare (i32 inreg %v ) {
165165; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
166166; CHECK: ; %bb.0:
167- ; CHECK-NEXT: s_xor_b32 s0, s0, 1
168- ; CHECK-NEXT: s_xor_b32 s0, s0, 1
169- ; CHECK-NEXT: s_and_b32 s0, s0, 1
170- ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
171- ; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
172- ; CHECK-NEXT: ; %bb.1: ; %true
173- ; CHECK-NEXT: s_mov_b32 s0, 42
174- ; CHECK-NEXT: s_branch .LBB10_3
175- ; CHECK-NEXT: .LBB10_2: ; %false
167+ ; CHECK-NEXT: s_and_b32 s0, 1, s0
168+ ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
169+ ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
170+ ; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
171+ ; CHECK-NEXT: ; %bb.1: ; %false
176172; CHECK-NEXT: s_mov_b32 s0, 33
177173; CHECK-NEXT: s_branch .LBB10_3
174+ ; CHECK-NEXT: .LBB10_2: ; %true
175+ ; CHECK-NEXT: s_mov_b32 s0, 42
176+ ; CHECK-NEXT: s_branch .LBB10_3
178177; CHECK-NEXT: .LBB10_3:
179178 %c = trunc i32 %v to i1
180179 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
@@ -212,7 +211,11 @@ false:
212211define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare (i32 inreg %v ) {
213212; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
214213; CHECK: ; %bb.0:
215- ; CHECK-NEXT: s_cmp_ge_u32 s0, 12
214+ ; CHECK-NEXT: s_cmp_lt_u32 s0, 12
215+ ; CHECK-NEXT: s_cselect_b32 s0, 1, 0
216+ ; CHECK-NEXT: s_and_b32 s0, 1, s0
217+ ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
218+ ; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
216219; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
217220; CHECK-NEXT: ; %bb.1: ; %true
218221; CHECK-NEXT: s_mov_b32 s0, 42
@@ -258,13 +261,17 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
258261; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
259262; CHECK: ; %bb.0:
260263; CHECK-NEXT: s_cmp_lt_u32 s0, 12
261- ; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
262- ; CHECK-NEXT: ; %bb.1: ; %true
263- ; CHECK-NEXT: s_mov_b32 s0, 42
264- ; CHECK-NEXT: s_branch .LBB14_3
265- ; CHECK-NEXT: .LBB14_2: ; %false
264+ ; CHECK-NEXT: s_cselect_b32 s0, 1, 0
265+ ; CHECK-NEXT: s_and_b32 s0, 1, s0
266+ ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
267+ ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
268+ ; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
269+ ; CHECK-NEXT: ; %bb.1: ; %false
266270; CHECK-NEXT: s_mov_b32 s0, 33
267271; CHECK-NEXT: s_branch .LBB14_3
272+ ; CHECK-NEXT: .LBB14_2: ; %true
273+ ; CHECK-NEXT: s_mov_b32 s0, 42
274+ ; CHECK-NEXT: s_branch .LBB14_3
268275; CHECK-NEXT: .LBB14_3:
269276 %c = icmp ult i32 %v , 12
270277 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
@@ -306,12 +313,14 @@ false:
306313define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and (i32 inreg %v1 , i32 inreg %v2 ) {
307314; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
308315; CHECK: ; %bb.0:
309- ; CHECK-NEXT: s_cmp_ge_u32 s0, 12
316+ ; CHECK-NEXT: s_cmp_lt_u32 s0, 12
310317; CHECK-NEXT: s_cselect_b32 s0, 1, 0
311- ; CHECK-NEXT: s_cmp_le_u32 s1, 34
318+ ; CHECK-NEXT: s_cmp_gt_u32 s1, 34
312319; CHECK-NEXT: s_cselect_b32 s1, 1, 0
313- ; CHECK-NEXT: s_or_b32 s0, s0, s1
314- ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
320+ ; CHECK-NEXT: s_and_b32 s0, s0, s1
321+ ; CHECK-NEXT: s_and_b32 s0, 1, s0
322+ ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
323+ ; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
315324; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
316325; CHECK-NEXT: ; %bb.1: ; %true
317326; CHECK-NEXT: s_mov_b32 s0, 42
@@ -366,14 +375,16 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
366375; CHECK-NEXT: s_cmp_gt_u32 s1, 34
367376; CHECK-NEXT: s_cselect_b32 s1, 1, 0
368377; CHECK-NEXT: s_and_b32 s0, s0, s1
369- ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
370- ; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
371- ; CHECK-NEXT: ; %bb.1: ; %true
372- ; CHECK-NEXT: s_mov_b32 s0, 42
373- ; CHECK-NEXT: s_branch .LBB18_3
374- ; CHECK-NEXT: .LBB18_2: ; %false
378+ ; CHECK-NEXT: s_and_b32 s0, 1, s0
379+ ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
380+ ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
381+ ; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
382+ ; CHECK-NEXT: ; %bb.1: ; %false
375383; CHECK-NEXT: s_mov_b32 s0, 33
376384; CHECK-NEXT: s_branch .LBB18_3
385+ ; CHECK-NEXT: .LBB18_2: ; %true
386+ ; CHECK-NEXT: s_mov_b32 s0, 42
387+ ; CHECK-NEXT: s_branch .LBB18_3
377388; CHECK-NEXT: .LBB18_3:
378389 %v1c = icmp ult i32 %v1 , 12
379390 %v2c = icmp ugt i32 %v2 , 34
0 commit comments