@@ -465,6 +465,7 @@ define amdgpu_ps i32 @bcnt064(i64 inreg %val0) {
465465; CHECK: ; %bb.0:
466466; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
467467; CHECK-NEXT: s_mov_b32 s1, 0
468+ ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
468469; CHECK-NEXT: ;;#ASMSTART
469470; CHECK-NEXT: ; use s[0:1]
470471; CHECK-NEXT: ;;#ASMEND
@@ -682,19 +683,18 @@ define amdgpu_ps void @bcnt064_not_for_vregs(ptr addrspace(1) %out, ptr addrspac
682683define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses (i32 inreg %val0 ) {
683684; CHECK-LABEL: bcnt032_ctpop_multiple_uses:
684685; CHECK: ; %bb.0:
685- ; CHECK-NEXT: _i32_b32 s0, s0
686- ; CHECK-NEXT: 32 s1, 32, s0
687- ; CHECK-NEXT: g_u32 s1, 0
688- ; CHECK-NEXT: TART
689- ; CHECK-NEXT: 0
690- ; CHECK-NEXT: ND
691- ; CHECK-NEXT: TART
692- ; CHECK-NEXT: 1
693- ; CHECK-NEXT: ND
694- ; CHECK-NEXT: ct_b64 s[0:1], -1, 0
695- ; CHECK-NEXT: sk_b32_e64 v0, 0, 1, s[0:1]
696- ; CHECK-NEXT: irstlane_b32 s0, v0
697- ; CHECK-NEXT: n to shader part epilog
686+ ; CHECK-NEXT: s_bcnt1_i32_b32 s1, s0
687+ ; CHECK-NEXT: s_bcnt0_i32_b32 s0, s0
688+ ; CHECK-NEXT: ;;#ASMSTART
689+ ; CHECK-NEXT: ; use s1
690+ ; CHECK-NEXT: ;;#ASMEND
691+ ; CHECK-NEXT: ;;#ASMSTART
692+ ; CHECK-NEXT: ; use s0
693+ ; CHECK-NEXT: ;;#ASMEND
694+ ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
695+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
696+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
697+ ; CHECK-NEXT: ; return to shader part epilog
698698 %result = call i32 @llvm.ctpop.i32 (i32 %val0 ) nounwind readnone
699699 %result2 = sub i32 32 , %result
700700 call void asm "; use $0" , "s" (i32 %result )
@@ -707,21 +707,21 @@ define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
707707define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses (i64 inreg %val0 ) {
708708; CHECK-LABEL: bcnt064_ctpop_multiple_uses:
709709; CHECK: ; %bb.0:
710- ; CHECK-NEXT: _i32_b64 s0, s[0:1]
711- ; CHECK-NEXT: 32 s2, 64, s0
712- ; CHECK-NEXT: u32 s3, 0, 0
713- ; CHECK-NEXT: 32 s1, 0
714- ; CHECK-NEXT: g_u64 s[2:3 ], 0
715- ; CHECK-NEXT: TART
716- ; CHECK-NEXT: [0:1]
717- ; CHECK-NEXT: ND
718- ; CHECK-NEXT: ct_b64 s[0:1], -1, 0
719- ; CHECK-NEXT: sk_b32_e64 v0, 0, 1, s[0:1]
720- ; CHECK-NEXT: irstlane_b32 s0, v0
721- ; CHECK-NEXT: TART
722- ; CHECK-NEXT: [2:3]
723- ; CHECK-NEXT: ND
724- ; CHECK-NEXT: n to shader part epilog
710+ ; CHECK-NEXT: s_mov_b32 s3, 0
711+ ; CHECK-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
712+ ; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
713+ ; CHECK-NEXT: s_mov_b32 s1, s3
714+ ; CHECK-NEXT: s_cmp_lg_u64 s[0:1 ], 0
715+ ; CHECK-NEXT: ;;#ASMSTART
716+ ; CHECK-NEXT: ; use s [0:1]
717+ ; CHECK-NEXT: ;;#ASMEND
718+ ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
719+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
720+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
721+ ; CHECK-NEXT: ;;#ASMSTART
722+ ; CHECK-NEXT: ; use s [2:3]
723+ ; CHECK-NEXT: ;;#ASMEND
724+ ; CHECK-NEXT: ; return to shader part epilog
725725 %result = call i64 @llvm.ctpop.i64 (i64 %val0 ) nounwind readnone
726726 %result2 = sub i64 64 , %result
727727 call void asm "; use $0" , "s" (i64 %result )
0 commit comments