@@ -116,9 +116,9 @@ false:
116116define  amdgpu_cs i32  @branch_uniform_ballot_ne_zero_non_compare (i32  inreg  %v ) {
117117; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: 
118118; CHECK:       ; %bb.0: 
119- ; CHECK-NEXT:    s_and_b32  s0, 1, s0  
120- ; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0  
121- ; CHECK-NEXT:    s_cmp_eq_u64 s[0:1] , 0 
119+ ; CHECK-NEXT:    s_xor_b32  s0, s0, 1  
120+ ; CHECK-NEXT:    s_and_b32 s0, s0, 1  
121+ ; CHECK-NEXT:    s_cmp_lg_u32 s0 , 0 
122122; CHECK-NEXT:    s_cbranch_scc1 .LBB8_2 
123123; CHECK-NEXT:  ; %bb.1: ; %true 
124124; CHECK-NEXT:    s_mov_b32 s0, 42 
@@ -164,16 +164,17 @@ false:
164164define  amdgpu_cs i32  @branch_uniform_ballot_eq_zero_non_compare (i32  inreg  %v ) {
165165; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: 
166166; CHECK:       ; %bb.0: 
167- ; CHECK-NEXT:    s_and_b32 s0, 1, s0 
168- ; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 
169- ; CHECK-NEXT:    s_cmp_lg_u64 s[0:1], 0 
170- ; CHECK-NEXT:    s_cbranch_scc0 .LBB10_2 
171- ; CHECK-NEXT:  ; %bb.1: ; %false 
172- ; CHECK-NEXT:    s_mov_b32 s0, 33 
173- ; CHECK-NEXT:    s_branch .LBB10_3 
174- ; CHECK-NEXT:  .LBB10_2: ; %true 
167+ ; CHECK-NEXT:    s_xor_b32 s0, s0, 1 
168+ ; CHECK-NEXT:    s_xor_b32 s0, s0, 1 
169+ ; CHECK-NEXT:    s_and_b32 s0, s0, 1 
170+ ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0 
171+ ; CHECK-NEXT:    s_cbranch_scc1 .LBB10_2 
172+ ; CHECK-NEXT:  ; %bb.1: ; %true 
175173; CHECK-NEXT:    s_mov_b32 s0, 42 
176174; CHECK-NEXT:    s_branch .LBB10_3 
175+ ; CHECK-NEXT:  .LBB10_2: ; %false 
176+ ; CHECK-NEXT:    s_mov_b32 s0, 33 
177+ ; CHECK-NEXT:    s_branch .LBB10_3 
177178; CHECK-NEXT:  .LBB10_3: 
178179  %c  = trunc  i32  %v  to  i1 
179180  %ballot  = call  i64  @llvm.amdgcn.ballot.i64 (i1  %c )
@@ -211,11 +212,7 @@ false:
211212define  amdgpu_cs i32  @branch_uniform_ballot_ne_zero_compare (i32  inreg  %v ) {
212213; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: 
213214; CHECK:       ; %bb.0: 
214- ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12 
215- ; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 
216- ; CHECK-NEXT:    s_and_b32 s0, 1, s0 
217- ; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 
218- ; CHECK-NEXT:    s_cmp_eq_u64 s[0:1], 0 
215+ ; CHECK-NEXT:    s_cmp_ge_u32 s0, 12 
219216; CHECK-NEXT:    s_cbranch_scc1 .LBB12_2 
220217; CHECK-NEXT:  ; %bb.1: ; %true 
221218; CHECK-NEXT:    s_mov_b32 s0, 42 
@@ -261,17 +258,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
261258; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: 
262259; CHECK:       ; %bb.0: 
263260; CHECK-NEXT:    s_cmp_lt_u32 s0, 12 
264- ; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 
265- ; CHECK-NEXT:    s_and_b32 s0, 1, s0 
266- ; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 
267- ; CHECK-NEXT:    s_cmp_lg_u64 s[0:1], 0 
268- ; CHECK-NEXT:    s_cbranch_scc0 .LBB14_2 
269- ; CHECK-NEXT:  ; %bb.1: ; %false 
270- ; CHECK-NEXT:    s_mov_b32 s0, 33 
271- ; CHECK-NEXT:    s_branch .LBB14_3 
272- ; CHECK-NEXT:  .LBB14_2: ; %true 
261+ ; CHECK-NEXT:    s_cbranch_scc1 .LBB14_2 
262+ ; CHECK-NEXT:  ; %bb.1: ; %true 
273263; CHECK-NEXT:    s_mov_b32 s0, 42 
274264; CHECK-NEXT:    s_branch .LBB14_3 
265+ ; CHECK-NEXT:  .LBB14_2: ; %false 
266+ ; CHECK-NEXT:    s_mov_b32 s0, 33 
267+ ; CHECK-NEXT:    s_branch .LBB14_3 
275268; CHECK-NEXT:  .LBB14_3: 
276269  %c  = icmp  ult  i32  %v , 12 
277270  %ballot  = call  i64  @llvm.amdgcn.ballot.i64 (i1  %c )
@@ -313,14 +306,12 @@ false:
313306define  amdgpu_cs i32  @branch_uniform_ballot_ne_zero_and (i32  inreg  %v1 , i32  inreg  %v2 ) {
314307; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: 
315308; CHECK:       ; %bb.0: 
316- ; CHECK-NEXT:    s_cmp_lt_u32  s0, 12 
309+ ; CHECK-NEXT:    s_cmp_ge_u32  s0, 12 
317310; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 
318- ; CHECK-NEXT:    s_cmp_gt_u32  s1, 34 
311+ ; CHECK-NEXT:    s_cmp_le_u32  s1, 34 
319312; CHECK-NEXT:    s_cselect_b32 s1, 1, 0 
320- ; CHECK-NEXT:    s_and_b32 s0, s0, s1 
321- ; CHECK-NEXT:    s_and_b32 s0, 1, s0 
322- ; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 
323- ; CHECK-NEXT:    s_cmp_eq_u64 s[0:1], 0 
313+ ; CHECK-NEXT:    s_or_b32 s0, s0, s1 
314+ ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0 
324315; CHECK-NEXT:    s_cbranch_scc1 .LBB16_2 
325316; CHECK-NEXT:  ; %bb.1: ; %true 
326317; CHECK-NEXT:    s_mov_b32 s0, 42 
@@ -375,16 +366,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
375366; CHECK-NEXT:    s_cmp_gt_u32 s1, 34 
376367; CHECK-NEXT:    s_cselect_b32 s1, 1, 0 
377368; CHECK-NEXT:    s_and_b32 s0, s0, s1 
378- ; CHECK-NEXT:    s_and_b32 s0, 1, s0 
379- ; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 
380- ; CHECK-NEXT:    s_cmp_lg_u64 s[0:1], 0 
381- ; CHECK-NEXT:    s_cbranch_scc0 .LBB18_2 
382- ; CHECK-NEXT:  ; %bb.1: ; %false 
383- ; CHECK-NEXT:    s_mov_b32 s0, 33 
384- ; CHECK-NEXT:    s_branch .LBB18_3 
385- ; CHECK-NEXT:  .LBB18_2: ; %true 
369+ ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0 
370+ ; CHECK-NEXT:    s_cbranch_scc1 .LBB18_2 
371+ ; CHECK-NEXT:  ; %bb.1: ; %true 
386372; CHECK-NEXT:    s_mov_b32 s0, 42 
387373; CHECK-NEXT:    s_branch .LBB18_3 
374+ ; CHECK-NEXT:  .LBB18_2: ; %false 
375+ ; CHECK-NEXT:    s_mov_b32 s0, 33 
376+ ; CHECK-NEXT:    s_branch .LBB18_3 
388377; CHECK-NEXT:  .LBB18_3: 
389378  %v1c  = icmp  ult  i32  %v1 , 12 
390379  %v2c  = icmp  ugt  i32  %v2 , 34 
0 commit comments