@@ -116,9 +116,9 @@ false:
116116define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare (i32 inreg %v ) {
117117; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
118118; CHECK: ; %bb.0:
119- ; CHECK-NEXT: s_and_b32 s0, 1, s0
120- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
121- ; CHECK-NEXT: s_cmp_eq_u64 s[0:1] , 0
119+ ; CHECK-NEXT: s_xor_b32 s0, s0, 1
120+ ; CHECK-NEXT: s_and_b32 s0, s0, 1
121+ ; CHECK-NEXT: s_cmp_lg_u32 s0 , 0
122122; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
123123; CHECK-NEXT: ; %bb.1: ; %true
124124; CHECK-NEXT: s_mov_b32 s0, 42
@@ -164,16 +164,17 @@ false:
164164define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare (i32 inreg %v ) {
165165; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
166166; CHECK: ; %bb.0:
167- ; CHECK-NEXT: s_and_b32 s0, 1, s0
168- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
169- ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
170- ; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
171- ; CHECK-NEXT: ; %bb.1: ; %false
172- ; CHECK-NEXT: s_mov_b32 s0, 33
173- ; CHECK-NEXT: s_branch .LBB10_3
174- ; CHECK-NEXT: .LBB10_2: ; %true
167+ ; CHECK-NEXT: s_xor_b32 s0, s0, 1
168+ ; CHECK-NEXT: s_xor_b32 s0, s0, 1
169+ ; CHECK-NEXT: s_and_b32 s0, s0, 1
170+ ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
171+ ; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
172+ ; CHECK-NEXT: ; %bb.1: ; %true
175173; CHECK-NEXT: s_mov_b32 s0, 42
176174; CHECK-NEXT: s_branch .LBB10_3
175+ ; CHECK-NEXT: .LBB10_2: ; %false
176+ ; CHECK-NEXT: s_mov_b32 s0, 33
177+ ; CHECK-NEXT: s_branch .LBB10_3
177178; CHECK-NEXT: .LBB10_3:
178179 %c = trunc i32 %v to i1
179180 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
@@ -211,11 +212,7 @@ false:
211212define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare (i32 inreg %v ) {
212213; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
213214; CHECK: ; %bb.0:
214- ; CHECK-NEXT: s_cmp_lt_u32 s0, 12
215- ; CHECK-NEXT: s_cselect_b32 s0, 1, 0
216- ; CHECK-NEXT: s_and_b32 s0, 1, s0
217- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
218- ; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
215+ ; CHECK-NEXT: s_cmp_ge_u32 s0, 12
219216; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
220217; CHECK-NEXT: ; %bb.1: ; %true
221218; CHECK-NEXT: s_mov_b32 s0, 42
@@ -261,17 +258,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
261258; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
262259; CHECK: ; %bb.0:
263260; CHECK-NEXT: s_cmp_lt_u32 s0, 12
264- ; CHECK-NEXT: s_cselect_b32 s0, 1, 0
265- ; CHECK-NEXT: s_and_b32 s0, 1, s0
266- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
267- ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
268- ; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
269- ; CHECK-NEXT: ; %bb.1: ; %false
270- ; CHECK-NEXT: s_mov_b32 s0, 33
271- ; CHECK-NEXT: s_branch .LBB14_3
272- ; CHECK-NEXT: .LBB14_2: ; %true
261+ ; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
262+ ; CHECK-NEXT: ; %bb.1: ; %true
273263; CHECK-NEXT: s_mov_b32 s0, 42
274264; CHECK-NEXT: s_branch .LBB14_3
265+ ; CHECK-NEXT: .LBB14_2: ; %false
266+ ; CHECK-NEXT: s_mov_b32 s0, 33
267+ ; CHECK-NEXT: s_branch .LBB14_3
275268; CHECK-NEXT: .LBB14_3:
276269 %c = icmp ult i32 %v , 12
277270 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
@@ -313,14 +306,12 @@ false:
313306define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and (i32 inreg %v1 , i32 inreg %v2 ) {
314307; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
315308; CHECK: ; %bb.0:
316- ; CHECK-NEXT: s_cmp_lt_u32 s0, 12
309+ ; CHECK-NEXT: s_cmp_ge_u32 s0, 12
317310; CHECK-NEXT: s_cselect_b32 s0, 1, 0
318- ; CHECK-NEXT: s_cmp_gt_u32 s1, 34
311+ ; CHECK-NEXT: s_cmp_le_u32 s1, 34
319312; CHECK-NEXT: s_cselect_b32 s1, 1, 0
320- ; CHECK-NEXT: s_and_b32 s0, s0, s1
321- ; CHECK-NEXT: s_and_b32 s0, 1, s0
322- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
323- ; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
313+ ; CHECK-NEXT: s_or_b32 s0, s0, s1
314+ ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
324315; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
325316; CHECK-NEXT: ; %bb.1: ; %true
326317; CHECK-NEXT: s_mov_b32 s0, 42
@@ -375,16 +366,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
375366; CHECK-NEXT: s_cmp_gt_u32 s1, 34
376367; CHECK-NEXT: s_cselect_b32 s1, 1, 0
377368; CHECK-NEXT: s_and_b32 s0, s0, s1
378- ; CHECK-NEXT: s_and_b32 s0, 1, s0
379- ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
380- ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
381- ; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
382- ; CHECK-NEXT: ; %bb.1: ; %false
383- ; CHECK-NEXT: s_mov_b32 s0, 33
384- ; CHECK-NEXT: s_branch .LBB18_3
385- ; CHECK-NEXT: .LBB18_2: ; %true
369+ ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
370+ ; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
371+ ; CHECK-NEXT: ; %bb.1: ; %true
386372; CHECK-NEXT: s_mov_b32 s0, 42
387373; CHECK-NEXT: s_branch .LBB18_3
374+ ; CHECK-NEXT: .LBB18_2: ; %false
375+ ; CHECK-NEXT: s_mov_b32 s0, 33
376+ ; CHECK-NEXT: s_branch .LBB18_3
388377; CHECK-NEXT: .LBB18_3:
389378 %v1c = icmp ult i32 %v1 , 12
390379 %v2c = icmp ugt i32 %v2 , 34
0 commit comments