1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
3+
4+ define amdgpu_ps void @bcnt032_not_for_vregs (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
5+ ; CHECK-LABEL: bcnt032_not_for_vregs:
6+ ; CHECK: ; %bb.0:
7+ ; CHECK-NEXT: s_lshl_b32 s0, s0, 2
8+ ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
9+ ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
10+ ; CHECK-NEXT: global_load_dword v2, v[2:3], off glc
11+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
12+ ; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
13+ ; CHECK-NEXT: v_sub_u32_e32 v3, 32, v2
14+ ; CHECK-NEXT: ;;#ASMSTART
15+ ; CHECK-NEXT: ; use v3
16+ ; CHECK-NEXT: ;;#ASMEND
17+ ; CHECK-NEXT: global_store_dword v[0:1], v2, off
18+ ; CHECK-NEXT: s_endpgm
19+ %tid = call i32 @llvm.amdgcn.workitem.id.x ()
20+ %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i32 %tid
21+ %val0 = load volatile i32 , ptr addrspace (1 ) %gep
22+ %result = call i32 @llvm.ctpop.i32 (i32 %val0 ) nounwind readnone
23+ %result2 = sub i32 32 , %result
24+ call void asm "; use $0" , "s" (i32 %result2 )
25+ %cmp = icmp ne i32 %result2 , 0
26+ %zext = zext i1 %cmp to i32
27+ store i32 %result , ptr addrspace (1 ) %out
28+ ret void
29+ }
30+
31+ define amdgpu_ps void @bcnt064_not_for_vregs (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
32+ ; CHECK-LABEL: bcnt064_not_for_vregs:
33+ ; CHECK: ; %bb.0:
34+ ; CHECK-NEXT: b32 s0, s0, 2
35+ ; CHECK-NEXT: o_u32_e32 v2, vcc, s0, v2
36+ ; CHECK-NEXT: co_u32_e32 v3, vcc, 0, v3, vcc
37+ ; CHECK-NEXT: load_dwordx2 v[2:3], v[2:3], off glc
38+ ; CHECK-NEXT: nt vmcnt(0)
39+ ; CHECK-NEXT: 32_e32 v4, 0
40+ ; CHECK-NEXT: u32_b32 v2, v2, 0
41+ ; CHECK-NEXT: u32_b32 v3, v3, v2
42+ ; CHECK-NEXT: o_u32_e32 v5, vcc, 64, v3
43+ ; CHECK-NEXT: co_u32_e64 v6, s[0:1], 0, 0, vcc
44+ ; CHECK-NEXT: TART
45+ ; CHECK-NEXT: [5:6]
46+ ; CHECK-NEXT: ND
47+ ; CHECK-NEXT: store_dwordx2 v[0:1], v[3:4], off
48+ ; CHECK-NEXT: m
49+ %tid = call i32 @llvm.amdgcn.workitem.id.x ()
50+ %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i32 %tid
51+ %val0 = load volatile i64 , ptr addrspace (1 ) %gep
52+ %result = call i64 @llvm.ctpop.i64 (i64 %val0 ) nounwind readnone
53+ %result2 = sub i64 64 , %result
54+ call void asm "; use $0" , "s" (i64 %result2 )
55+ %cmp = icmp ne i64 %result2 , 0
56+ %zext = zext i1 %cmp to i32
57+ store i64 %result , ptr addrspace (1 ) %out
58+ ret void
59+ }
60+
61+ define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses (i32 inreg %val0 ) {
62+ ; CHECK-LABEL: bcnt032_ctpop_multiple_uses:
63+ ; CHECK: ; %bb.0:
64+ ; CHECK-NEXT: s_bcnt1_i32_b32 s1, s0
65+ ; CHECK-NEXT: s_bcnt0_i32_b32 s0, s0
66+ ; CHECK-NEXT: ;;#ASMSTART
67+ ; CHECK-NEXT: ; use s1
68+ ; CHECK-NEXT: ;;#ASMEND
69+ ; CHECK-NEXT: ;;#ASMSTART
70+ ; CHECK-NEXT: ; use s0
71+ ; CHECK-NEXT: ;;#ASMEND
72+ ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
73+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
74+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
75+ ; CHECK-NEXT: ; return to shader part epilog
76+ %result = call i32 @llvm.ctpop.i32 (i32 %val0 ) nounwind readnone
77+ %result2 = sub i32 32 , %result
78+ call void asm "; use $0" , "s" (i32 %result )
79+ call void asm "; use $0" , "s" (i32 %result2 )
80+ %cmp = icmp ne i32 %result2 , 0
81+ %zext = zext i1 %cmp to i32
82+ ret i32 %zext
83+ }
84+
85+ define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses (i64 inreg %val0 ) {
86+ ; CHECK-LABEL: bcnt064_ctpop_multiple_uses:
87+ ; CHECK: ; %bb.0:
88+ ; CHECK-NEXT: s_mov_b32 s3, 0
89+ ; CHECK-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
90+ ; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
91+ ; CHECK-NEXT: s_mov_b32 s1, s3
92+ ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
93+ ; CHECK-NEXT: ;;#ASMSTART
94+ ; CHECK-NEXT: ; use s[0:1]
95+ ; CHECK-NEXT: ;;#ASMEND
96+ ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
97+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
98+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
99+ ; CHECK-NEXT: ;;#ASMSTART
100+ ; CHECK-NEXT: ; use s[2:3]
101+ ; CHECK-NEXT: ;;#ASMEND
102+ ; CHECK-NEXT: ; return to shader part epilog
103+ %result = call i64 @llvm.ctpop.i64 (i64 %val0 ) nounwind readnone
104+ %result2 = sub i64 64 , %result
105+ call void asm "; use $0" , "s" (i64 %result )
106+ call void asm "; use $0" , "s" (i64 %result2 )
107+ %cmp = icmp ne i64 %result2 , 0
108+ %zext = zext i1 %cmp to i32
109+ ret i32 %zext
110+ }
0 commit comments