11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
22; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
33
4- define amdgpu_ps void @bcnt032_not_for_vregs (ptr addrspace ( 1 ) %out , ptr addrspace ( 1 ) %in ) {
4+ define amdgpu_ps i32 @bcnt032_not_for_vregs (i64 %val ) {
55; CHECK-LABEL: bcnt032_not_for_vregs:
66; CHECK: ; %bb.0:
7- ; CHECK-NEXT: s_lshl_b32 s0, s0, 2
8- ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
9- ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
10- ; CHECK-NEXT: global_load_dword v2, v[2:3], off glc
11- ; CHECK-NEXT: s_waitcnt vmcnt(0)
12- ; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
13- ; CHECK-NEXT: v_sub_u32_e32 v3, 32, v2
7+ ; CHECK-NEXT: v_bcnt_u32_b32 v0, v0, 0
8+ ; CHECK-NEXT: v_sub_u32_e32 v0, 32, v0
9+ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1410; CHECK-NEXT: ;;#ASMSTART
15- ; CHECK-NEXT: ; use v3
11+ ; CHECK-NEXT: ; use v0
1612; CHECK-NEXT: ;;#ASMEND
17- ; CHECK-NEXT: global_store_dword v[0:1], v2, off
18- ; CHECK-NEXT: s_endpgm
19- %tid = call i32 @llvm.amdgcn.workitem.id.x ()
20- %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i32 %tid
21- %val0 = load volatile i32 , ptr addrspace (1 ) %gep
22- %result = call i32 @llvm.ctpop.i32 (i32 %val0 ) nounwind readnone
13+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
14+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
15+ ; CHECK-NEXT: ; return to shader part epilog
16+ %val0 = trunc i64 %val to i32
17+ %result = call i32 @llvm.ctpop.i32 (i32 %val0 )
2318 %result2 = sub i32 32 , %result
2419 call void asm "; use $0" , "s" (i32 %result2 )
2520 %cmp = icmp ne i32 %result2 , 0
2621 %zext = zext i1 %cmp to i32
27- store i32 %result , ptr addrspace (1 ) %out
28- ret void
22+ ret i32 %zext
2923}
3024
31- define amdgpu_ps void @bcnt064_not_for_vregs (ptr addrspace ( 1 ) %out , ptr addrspace ( 1 ) %in ) {
25+ define amdgpu_ps i32 @bcnt064_not_for_vregs (i64 %val0 ) {
3226; CHECK-LABEL: bcnt064_not_for_vregs:
3327; CHECK: ; %bb.0:
34- ; CHECK-NEXT: s_lshl_b32 s0, s0, 2
35- ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
36- ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
37- ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off glc
38- ; CHECK-NEXT: s_waitcnt vmcnt(0)
39- ; CHECK-NEXT: v_mov_b32_e32 v4, 0
40- ; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
41- ; CHECK-NEXT: v_bcnt_u32_b32 v3, v3, v2
42- ; CHECK-NEXT: v_sub_co_u32_e32 v5, vcc, 64, v3
43- ; CHECK-NEXT: v_subb_co_u32_e64 v6, s[0:1], 0, 0, vcc
28+ ; CHECK-NEXT: v_bcnt_u32_b32 v0, v0, 0
29+ ; CHECK-NEXT: v_bcnt_u32_b32 v0, v1, v0
30+ ; CHECK-NEXT: v_sub_co_u32_e32 v0, vcc, 64, v0
31+ ; CHECK-NEXT: v_subb_co_u32_e64 v1, s[0:1], 0, 0, vcc
32+ ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
4433; CHECK-NEXT: ;;#ASMSTART
45- ; CHECK-NEXT: ; use v[5:6 ]
34+ ; CHECK-NEXT: ; use v[0:1 ]
4635; CHECK-NEXT: ;;#ASMEND
47- ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[3:4], off
48- ; CHECK-NEXT: s_endpgm
49- %tid = call i32 @llvm.amdgcn.workitem.id.x ()
50- %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i32 %tid
51- %val0 = load volatile i64 , ptr addrspace (1 ) %gep
52- %result = call i64 @llvm.ctpop.i64 (i64 %val0 ) nounwind readnone
36+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
37+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
38+ ; CHECK-NEXT: ; return to shader part epilog
39+ %result = call i64 @llvm.ctpop.i64 (i64 %val0 )
5340 %result2 = sub i64 64 , %result
5441 call void asm "; use $0" , "s" (i64 %result2 )
5542 %cmp = icmp ne i64 %result2 , 0
5643 %zext = zext i1 %cmp to i32
57- store i64 %result , ptr addrspace (1 ) %out
58- ret void
44+ ret i32 %zext
5945}
6046
6147define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses (i32 inreg %val0 ) {
@@ -73,7 +59,7 @@ define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
7359; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
7460; CHECK-NEXT: v_readfirstlane_b32 s0, v0
7561; CHECK-NEXT: ; return to shader part epilog
76- %result = call i32 @llvm.ctpop.i32 (i32 %val0 ) nounwind readnone
62+ %result = call i32 @llvm.ctpop.i32 (i32 %val0 )
7763 %result2 = sub i32 32 , %result
7864 call void asm "; use $0" , "s" (i32 %result )
7965 call void asm "; use $0" , "s" (i32 %result2 )
@@ -100,7 +86,7 @@ define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses(i64 inreg %val0) {
10086; CHECK-NEXT: ; use s[2:3]
10187; CHECK-NEXT: ;;#ASMEND
10288; CHECK-NEXT: ; return to shader part epilog
103- %result = call i64 @llvm.ctpop.i64 (i64 %val0 ) nounwind readnone
89+ %result = call i64 @llvm.ctpop.i64 (i64 %val0 )
10490 %result2 = sub i64 64 , %result
10591 call void asm "; use $0" , "s" (i64 %result )
10692 call void asm "; use $0" , "s" (i64 %result2 )
0 commit comments