33
44define amdgpu_ps void @bcnt032_not_for_vregs (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
55; CHECK-LABEL: bcnt032_not_for_vregs:
6- ; CHECK: ; %bb.0:
7- ; CHECK-NEXT: s_lshl_b32 s0, s0, 2
8- ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
9- ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
10- ; CHECK-NEXT: global_load_dword v2, v[2:3], off glc
11- ; CHECK-NEXT: s_waitcnt vmcnt(0)
12- ; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
13- ; CHECK-NEXT: v_sub_u32_e32 v3, 32, v2
14- ; CHECK-NEXT: ;;#ASMSTART
15- ; CHECK-NEXT: ; use v3
16- ; CHECK-NEXT: ;;#ASMEND
17- ; CHECK-NEXT: global_store_dword v[0:1], v2, off
18- ; CHECK-NEXT: s_endpgm
6+ ; CHECK: ; %bb.0:
7+ ; CHECK-NEXT: s_lshl_b32 s0, s0, 2
8+ ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
9+ ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
10+ ; CHECK-NEXT: global_load_dword v2, v[2:3], off glc
11+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
12+ ; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
13+ ; CHECK-NEXT: v_sub_u32_e32 v3, 32, v2
14+ ; CHECK-NEXT: ;;#ASMSTART
15+ ; CHECK-NEXT: ; use v3
16+ ; CHECK-NEXT: ;;#ASMEND
17+ ; CHECK-NEXT: global_store_dword v[0:1], v2, off
18+ ; CHECK-NEXT: s_endpgm
1919 %tid = call i32 @llvm.amdgcn.workitem.id.x ()
2020 %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i32 %tid
2121 %val0 = load volatile i32 , ptr addrspace (1 ) %gep
@@ -30,22 +30,22 @@ define amdgpu_ps void @bcnt032_not_for_vregs(ptr addrspace(1) %out, ptr addrspac
3030
3131define amdgpu_ps void @bcnt064_not_for_vregs (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
3232; CHECK-LABEL: bcnt064_not_for_vregs:
33- ; CHECK: ; %bb.0:
34- ; CHECK-NEXT: b32 s0, s0, 2
35- ; CHECK-NEXT: o_u32_e32 v2, vcc, s0, v2
36- ; CHECK-NEXT: co_u32_e32 v3, vcc, 0, v3, vcc
37- ; CHECK-NEXT: load_dwordx2 v[2:3], v[2:3], off glc
38- ; CHECK-NEXT: nt vmcnt(0)
39- ; CHECK-NEXT: 32_e32 v4, 0
40- ; CHECK-NEXT: u32_b32 v2, v2, 0
41- ; CHECK-NEXT: u32_b32 v3, v3, v2
42- ; CHECK-NEXT: o_u32_e32 v5, vcc, 64, v3
43- ; CHECK-NEXT: co_u32_e64 v6, s[0:1], 0, 0, vcc
44- ; CHECK-NEXT: TART
45- ; CHECK-NEXT: [5:6]
46- ; CHECK-NEXT: ND
47- ; CHECK-NEXT: store_dwordx2 v[0:1], v[3:4], off
48- ; CHECK-NEXT: m
33+ ; CHECK: ; %bb.0:
34+ ; CHECK-NEXT: s_lshl_b32 s0, s0, 2
35+ ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
36+ ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
37+ ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off glc
38+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
39+ ; CHECK-NEXT: v_mov_b32_e32 v4, 0
40+ ; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
41+ ; CHECK-NEXT: v_bcnt_u32_b32 v3, v3, v2
42+ ; CHECK-NEXT: v_sub_co_u32_e32 v5, vcc, 64, v3
43+ ; CHECK-NEXT: v_subb_co_u32_e64 v6, s[0:1], 0, 0, vcc
44+ ; CHECK-NEXT: ;;#ASMSTART
45+ ; CHECK-NEXT: ; use v [5:6]
46+ ; CHECK-NEXT: ;;#ASMEND
47+ ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[3:4], off
48+ ; CHECK-NEXT: s_endpgm
4949 %tid = call i32 @llvm.amdgcn.workitem.id.x ()
5050 %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i32 %tid
5151 %val0 = load volatile i64 , ptr addrspace (1 ) %gep
@@ -61,18 +61,18 @@ define amdgpu_ps void @bcnt064_not_for_vregs(ptr addrspace(1) %out, ptr addrspac
6161define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses (i32 inreg %val0 ) {
6262; CHECK-LABEL: bcnt032_ctpop_multiple_uses:
6363; CHECK: ; %bb.0:
64- ; CHECK-NEXT: s_bcnt1_i32_b32 s1, s0
65- ; CHECK-NEXT: s_bcnt0_i32_b32 s0, s0
66- ; CHECK-NEXT: ;;#ASMSTART
67- ; CHECK-NEXT: ; use s1
68- ; CHECK-NEXT: ;;#ASMEND
69- ; CHECK-NEXT: ;;#ASMSTART
70- ; CHECK-NEXT: ; use s0
71- ; CHECK-NEXT: ;;#ASMEND
72- ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
73- ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
74- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
75- ; CHECK-NEXT: ; return to shader part epilog
64+ ; CHECK-NEXT: s_bcnt1_i32_b32 s1, s0
65+ ; CHECK-NEXT: s_bcnt0_i32_b32 s0, s0
66+ ; CHECK-NEXT: ;;#ASMSTART
67+ ; CHECK-NEXT: ; use s1
68+ ; CHECK-NEXT: ;;#ASMEND
69+ ; CHECK-NEXT: ;;#ASMSTART
70+ ; CHECK-NEXT: ; use s0
71+ ; CHECK-NEXT: ;;#ASMEND
72+ ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
73+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
74+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
75+ ; CHECK-NEXT: ; return to shader part epilog
7676 %result = call i32 @llvm.ctpop.i32 (i32 %val0 ) nounwind readnone
7777 %result2 = sub i32 32 , %result
7878 call void asm "; use $0" , "s" (i32 %result )
@@ -85,26 +85,26 @@ define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
8585define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses (i64 inreg %val0 ) {
8686; CHECK-LABEL: bcnt064_ctpop_multiple_uses:
8787; CHECK: ; %bb.0:
88- ; CHECK-NEXT: s_mov_b32 s3, 0
89- ; CHECK-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
90- ; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
91- ; CHECK-NEXT: s_mov_b32 s1, s3
92- ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
93- ; CHECK-NEXT: ;;#ASMSTART
94- ; CHECK-NEXT: ; use s[0:1]
95- ; CHECK-NEXT: ;;#ASMEND
96- ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
97- ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
98- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
99- ; CHECK-NEXT: ;;#ASMSTART
100- ; CHECK-NEXT: ; use s[2:3]
101- ; CHECK-NEXT: ;;#ASMEND
102- ; CHECK-NEXT: ; return to shader part epilog
88+ ; CHECK-NEXT: s_mov_b32 s3, 0
89+ ; CHECK-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
90+ ; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
91+ ; CHECK-NEXT: s_mov_b32 s1, s3
92+ ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
93+ ; CHECK-NEXT: ;;#ASMSTART
94+ ; CHECK-NEXT: ; use s[0:1]
95+ ; CHECK-NEXT: ;;#ASMEND
96+ ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
97+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
98+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
99+ ; CHECK-NEXT: ;;#ASMSTART
100+ ; CHECK-NEXT: ; use s[2:3]
101+ ; CHECK-NEXT: ;;#ASMEND
102+ ; CHECK-NEXT: ; return to shader part epilog
103103 %result = call i64 @llvm.ctpop.i64 (i64 %val0 ) nounwind readnone
104104 %result2 = sub i64 64 , %result
105105 call void asm "; use $0" , "s" (i64 %result )
106106 call void asm "; use $0" , "s" (i64 %result2 )
107107 %cmp = icmp ne i64 %result2 , 0
108108 %zext = zext i1 %cmp to i32
109109 ret i32 %zext
110- }
110+ }
0 commit comments