Skip to content

Commit b44c016

Browse files
committed
Change testcases
1 parent d4d3428 commit b44c016

File tree

1 file changed

+25
-39
lines changed

1 file changed

+25
-39
lines changed

llvm/test/CodeGen/AMDGPU/s_bcnt0.ll

Lines changed: 25 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,47 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
22
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
33

4-
define amdgpu_ps void @bcnt032_not_for_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
4+
define amdgpu_ps i32 @bcnt032_not_for_vregs(i64 %val) {
55
; CHECK-LABEL: bcnt032_not_for_vregs:
66
; CHECK: ; %bb.0:
7-
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
8-
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
9-
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
10-
; CHECK-NEXT: global_load_dword v2, v[2:3], off glc
11-
; CHECK-NEXT: s_waitcnt vmcnt(0)
12-
; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
13-
; CHECK-NEXT: v_sub_u32_e32 v3, 32, v2
7+
; CHECK-NEXT: v_bcnt_u32_b32 v0, v0, 0
8+
; CHECK-NEXT: v_sub_u32_e32 v0, 32, v0
9+
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1410
; CHECK-NEXT: ;;#ASMSTART
15-
; CHECK-NEXT: ; use v3
11+
; CHECK-NEXT: ; use v0
1612
; CHECK-NEXT: ;;#ASMEND
17-
; CHECK-NEXT: global_store_dword v[0:1], v2, off
18-
; CHECK-NEXT: s_endpgm
19-
%tid = call i32 @llvm.amdgcn.workitem.id.x()
20-
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
21-
%val0 = load volatile i32, ptr addrspace(1) %gep
22-
%result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
13+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
14+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
15+
; CHECK-NEXT: ; return to shader part epilog
16+
%val0 = trunc i64 %val to i32
17+
%result = call i32 @llvm.ctpop.i32(i32 %val0)
2318
%result2 = sub i32 32, %result
2419
call void asm "; use $0", "s"(i32 %result2)
2520
%cmp = icmp ne i32 %result2, 0
2621
%zext = zext i1 %cmp to i32
27-
store i32 %result, ptr addrspace(1) %out
28-
ret void
22+
ret i32 %zext
2923
}
3024

31-
define amdgpu_ps void @bcnt064_not_for_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
25+
define amdgpu_ps i32 @bcnt064_not_for_vregs(i64 %val0) {
3226
; CHECK-LABEL: bcnt064_not_for_vregs:
3327
; CHECK: ; %bb.0:
34-
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
35-
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
36-
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
37-
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off glc
38-
; CHECK-NEXT: s_waitcnt vmcnt(0)
39-
; CHECK-NEXT: v_mov_b32_e32 v4, 0
40-
; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
41-
; CHECK-NEXT: v_bcnt_u32_b32 v3, v3, v2
42-
; CHECK-NEXT: v_sub_co_u32_e32 v5, vcc, 64, v3
43-
; CHECK-NEXT: v_subb_co_u32_e64 v6, s[0:1], 0, 0, vcc
28+
; CHECK-NEXT: v_bcnt_u32_b32 v0, v0, 0
29+
; CHECK-NEXT: v_bcnt_u32_b32 v0, v1, v0
30+
; CHECK-NEXT: v_sub_co_u32_e32 v0, vcc, 64, v0
31+
; CHECK-NEXT: v_subb_co_u32_e64 v1, s[0:1], 0, 0, vcc
32+
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
4433
; CHECK-NEXT: ;;#ASMSTART
45-
; CHECK-NEXT: ; use v[5:6]
34+
; CHECK-NEXT: ; use v[0:1]
4635
; CHECK-NEXT: ;;#ASMEND
47-
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[3:4], off
48-
; CHECK-NEXT: s_endpgm
49-
%tid = call i32 @llvm.amdgcn.workitem.id.x()
50-
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
51-
%val0 = load volatile i64, ptr addrspace(1) %gep
52-
%result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone
36+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
37+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
38+
; CHECK-NEXT: ; return to shader part epilog
39+
%result = call i64 @llvm.ctpop.i64(i64 %val0)
5340
%result2 = sub i64 64, %result
5441
call void asm "; use $0", "s"(i64 %result2)
5542
%cmp = icmp ne i64 %result2, 0
5643
%zext = zext i1 %cmp to i32
57-
store i64 %result, ptr addrspace(1) %out
58-
ret void
44+
ret i32 %zext
5945
}
6046

6147
define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
@@ -73,7 +59,7 @@ define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
7359
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
7460
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
7561
; CHECK-NEXT: ; return to shader part epilog
76-
%result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
62+
%result = call i32 @llvm.ctpop.i32(i32 %val0)
7763
%result2 = sub i32 32, %result
7864
call void asm "; use $0", "s"(i32 %result)
7965
call void asm "; use $0", "s"(i32 %result2)
@@ -100,7 +86,7 @@ define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses(i64 inreg %val0) {
10086
; CHECK-NEXT: ; use s[2:3]
10187
; CHECK-NEXT: ;;#ASMEND
10288
; CHECK-NEXT: ; return to shader part epilog
103-
%result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone
89+
%result = call i64 @llvm.ctpop.i64(i64 %val0)
10490
%result2 = sub i64 64, %result
10591
call void asm "; use $0", "s"(i64 %result)
10692
call void asm "; use $0", "s"(i64 %result2)

0 commit comments

Comments
 (0)