@@ -621,3 +621,112 @@ if:
621621endif:
622622 ret i32 1
623623}
624+
625+ define amdgpu_ps void @bcnt032_not_for_vregs (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
626+ ; CHECK-LABEL: bcnt032_not_for_vregs:
627+ ; CHECK: ; %bb.0:
628+ ; CHECK-NEXT: s_lshl_b32 s0, s0, 2
629+ ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
630+ ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
631+ ; CHECK-NEXT: global_load_dword v2, v[2:3], off glc
632+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
633+ ; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
634+ ; CHECK-NEXT: v_sub_u32_e32 v3, 32, v2
635+ ; CHECK-NEXT: ;;#ASMSTART
636+ ; CHECK-NEXT: ; use v3
637+ ; CHECK-NEXT: ;;#ASMEND
638+ ; CHECK-NEXT: global_store_dword v[0:1], v2, off
639+ ; CHECK-NEXT: s_endpgm
640+ %tid = call i32 @llvm.amdgcn.workitem.id.x ()
641+ %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i32 %tid
642+ %val0 = load volatile i32 , ptr addrspace (1 ) %gep
643+ %result = call i32 @llvm.ctpop.i32 (i32 %val0 ) nounwind readnone
644+ %result2 = sub i32 32 , %result
645+ call void asm "; use $0" , "s" (i32 %result2 )
646+ %cmp = icmp ne i32 %result2 , 0
647+ %zext = zext i1 %cmp to i32
648+ store i32 %result , ptr addrspace (1 ) %out
649+ ret void
650+ }
651+
652+ define amdgpu_ps void @bcnt064_not_for_vregs (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
653+ ; CHECK-LABEL: bcnt064_not_for_vregs:
654+ ; CHECK: ; %bb.0:
655+ ; CHECK-NEXT: b32 s0, s0, 2
656+ ; CHECK-NEXT: o_u32_e32 v2, vcc, s0, v2
657+ ; CHECK-NEXT: co_u32_e32 v3, vcc, 0, v3, vcc
658+ ; CHECK-NEXT: load_dwordx2 v[2:3], v[2:3], off glc
659+ ; CHECK-NEXT: nt vmcnt(0)
660+ ; CHECK-NEXT: 32_e32 v4, 0
661+ ; CHECK-NEXT: u32_b32 v2, v2, 0
662+ ; CHECK-NEXT: u32_b32 v3, v3, v2
663+ ; CHECK-NEXT: o_u32_e32 v5, vcc, 64, v3
664+ ; CHECK-NEXT: co_u32_e64 v6, s[0:1], 0, 0, vcc
665+ ; CHECK-NEXT: TART
666+ ; CHECK-NEXT: [5:6]
667+ ; CHECK-NEXT: ND
668+ ; CHECK-NEXT: store_dwordx2 v[0:1], v[3:4], off
669+ ; CHECK-NEXT: m
670+ %tid = call i32 @llvm.amdgcn.workitem.id.x ()
671+ %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i32 %tid
672+ %val0 = load volatile i64 , ptr addrspace (1 ) %gep
673+ %result = call i64 @llvm.ctpop.i64 (i64 %val0 ) nounwind readnone
674+ %result2 = sub i64 64 , %result
675+ call void asm "; use $0" , "s" (i64 %result2 )
676+ %cmp = icmp ne i64 %result2 , 0
677+ %zext = zext i1 %cmp to i32
678+ store i64 %result , ptr addrspace (1 ) %out
679+ ret void
680+ }
681+
682+ define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses (i32 inreg %val0 ) {
683+ ; CHECK-LABEL: bcnt032_ctpop_multiple_uses:
684+ ; CHECK: ; %bb.0:
685+ ; CHECK-NEXT: _i32_b32 s0, s0
686+ ; CHECK-NEXT: 32 s1, 32, s0
687+ ; CHECK-NEXT: g_u32 s1, 0
688+ ; CHECK-NEXT: TART
689+ ; CHECK-NEXT: 0
690+ ; CHECK-NEXT: ND
691+ ; CHECK-NEXT: TART
692+ ; CHECK-NEXT: 1
693+ ; CHECK-NEXT: ND
694+ ; CHECK-NEXT: ct_b64 s[0:1], -1, 0
695+ ; CHECK-NEXT: sk_b32_e64 v0, 0, 1, s[0:1]
696+ ; CHECK-NEXT: irstlane_b32 s0, v0
697+ ; CHECK-NEXT: n to shader part epilog
698+ %result = call i32 @llvm.ctpop.i32 (i32 %val0 ) nounwind readnone
699+ %result2 = sub i32 32 , %result
700+ call void asm "; use $0" , "s" (i32 %result )
701+ call void asm "; use $0" , "s" (i32 %result2 )
702+ %cmp = icmp ne i32 %result2 , 0
703+ %zext = zext i1 %cmp to i32
704+ ret i32 %zext
705+ }
706+
707+ define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses (i64 inreg %val0 ) {
708+ ; CHECK-LABEL: bcnt064_ctpop_multiple_uses:
709+ ; CHECK: ; %bb.0:
710+ ; CHECK-NEXT: _i32_b64 s0, s[0:1]
711+ ; CHECK-NEXT: 32 s2, 64, s0
712+ ; CHECK-NEXT: u32 s3, 0, 0
713+ ; CHECK-NEXT: 32 s1, 0
714+ ; CHECK-NEXT: g_u64 s[2:3], 0
715+ ; CHECK-NEXT: TART
716+ ; CHECK-NEXT: [0:1]
717+ ; CHECK-NEXT: ND
718+ ; CHECK-NEXT: ct_b64 s[0:1], -1, 0
719+ ; CHECK-NEXT: sk_b32_e64 v0, 0, 1, s[0:1]
720+ ; CHECK-NEXT: irstlane_b32 s0, v0
721+ ; CHECK-NEXT: TART
722+ ; CHECK-NEXT: [2:3]
723+ ; CHECK-NEXT: ND
724+ ; CHECK-NEXT: n to shader part epilog
725+ %result = call i64 @llvm.ctpop.i64 (i64 %val0 ) nounwind readnone
726+ %result2 = sub i64 64 , %result
727+ call void asm "; use $0" , "s" (i64 %result )
728+ call void asm "; use $0" , "s" (i64 %result2 )
729+ %cmp = icmp ne i64 %result2 , 0
730+ %zext = zext i1 %cmp to i32
731+ ret i32 %zext
732+ }
0 commit comments