Skip to content

Commit 47c7afd

Browse files
committed
Precommit autogen checks
Change-Id: I72dcb47e39e5d78a0d770ddcd0fa1023b30a0eab
1 parent 3aaa58f commit 47c7afd

File tree

1 file changed

+97
-29
lines changed

1 file changed

+97
-29
lines changed

llvm/test/CodeGen/AMDGPU/alignbit-pat.ll

Lines changed: 97 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,23 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
12
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
23

3-
; GCN-LABEL: {{^}}alignbit_shr_pat:
4-
; GCN-DAG: s_load_dword s[[SHR:[0-9]+]]
5-
; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
6-
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]]
7-
84
define amdgpu_kernel void @alignbit_shr_pat(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
5+
; GCN-LABEL: alignbit_shr_pat:
6+
; GCN: ; %bb.0: ; %bb
7+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
8+
; GCN-NEXT: s_load_dword s8, s[4:5], 0xd
9+
; GCN-NEXT: s_mov_b32 s7, 0xf000
10+
; GCN-NEXT: s_mov_b32 s6, -1
11+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
12+
; GCN-NEXT: s_mov_b32 s4, s0
13+
; GCN-NEXT: s_mov_b32 s5, s1
14+
; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
15+
; GCN-NEXT: s_mov_b32 s4, s2
16+
; GCN-NEXT: s_mov_b32 s5, s3
17+
; GCN-NEXT: s_waitcnt vmcnt(0)
18+
; GCN-NEXT: v_alignbit_b32 v0, v1, v0, s8
19+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
20+
; GCN-NEXT: s_endpgm
921
bb:
1022
%tmp = load i64, ptr addrspace(1) %arg, align 8
1123
%tmp3 = and i32 %arg2, 31
@@ -16,12 +28,24 @@ bb:
1628
ret void
1729
}
1830

19-
; GCN-LABEL: {{^}}alignbit_shr_pat_v:
20-
; GCN-DAG: load_dword v[[SHR:[0-9]+]],
21-
; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
22-
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], v[[SHR]]
23-
2431
define amdgpu_kernel void @alignbit_shr_pat_v(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
32+
; GCN-LABEL: alignbit_shr_pat_v:
33+
; GCN: ; %bb.0: ; %bb
34+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
35+
; GCN-NEXT: s_mov_b32 s7, 0xf000
36+
; GCN-NEXT: s_mov_b32 s6, 0
37+
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
38+
; GCN-NEXT: v_mov_b32_e32 v2, 0
39+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
40+
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
41+
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
42+
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
43+
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
44+
; GCN-NEXT: buffer_load_dword v0, v[1:2], s[4:7], 0 addr64
45+
; GCN-NEXT: s_waitcnt vmcnt(0)
46+
; GCN-NEXT: v_alignbit_b32 v0, v4, v3, v0
47+
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
48+
; GCN-NEXT: s_endpgm
2549
bb:
2650
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
2751
%gep1 = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tid
@@ -36,12 +60,24 @@ bb:
3660
ret void
3761
}
3862

39-
; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_and30:
40-
; Negative test, wrong constant
41-
; GCN: v_lshr_b64
42-
; GCN-NOT: v_alignbit_b32
43-
4463
define amdgpu_kernel void @alignbit_shr_pat_wrong_and30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
64+
; GCN-LABEL: alignbit_shr_pat_wrong_and30:
65+
; GCN: ; %bb.0: ; %bb
66+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
67+
; GCN-NEXT: s_load_dword s8, s[4:5], 0xd
68+
; GCN-NEXT: s_mov_b32 s7, 0xf000
69+
; GCN-NEXT: s_mov_b32 s6, -1
70+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
71+
; GCN-NEXT: s_mov_b32 s4, s0
72+
; GCN-NEXT: s_mov_b32 s5, s1
73+
; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
74+
; GCN-NEXT: s_mov_b32 s4, s2
75+
; GCN-NEXT: s_mov_b32 s5, s3
76+
; GCN-NEXT: s_and_b32 s0, s8, 30
77+
; GCN-NEXT: s_waitcnt vmcnt(0)
78+
; GCN-NEXT: v_lshr_b64 v[0:1], v[0:1], s0
79+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
80+
; GCN-NEXT: s_endpgm
4581
bb:
4682
%tmp = load i64, ptr addrspace(1) %arg, align 8
4783
%tmp3 = and i32 %arg2, 30
@@ -52,12 +88,23 @@ bb:
5288
ret void
5389
}
5490

55-
; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_and63:
56-
; Negative test, wrong constant
57-
; GCN: v_lshr_b64
58-
; GCN-NOT: v_alignbit_b32
59-
6091
define amdgpu_kernel void @alignbit_shr_pat_wrong_and63(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
92+
; GCN-LABEL: alignbit_shr_pat_wrong_and63:
93+
; GCN: ; %bb.0: ; %bb
94+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
95+
; GCN-NEXT: s_load_dword s8, s[4:5], 0xd
96+
; GCN-NEXT: s_mov_b32 s7, 0xf000
97+
; GCN-NEXT: s_mov_b32 s6, -1
98+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
99+
; GCN-NEXT: s_mov_b32 s4, s0
100+
; GCN-NEXT: s_mov_b32 s5, s1
101+
; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
102+
; GCN-NEXT: s_mov_b32 s4, s2
103+
; GCN-NEXT: s_mov_b32 s5, s3
104+
; GCN-NEXT: s_waitcnt vmcnt(0)
105+
; GCN-NEXT: v_lshr_b64 v[0:1], v[0:1], s8
106+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
107+
; GCN-NEXT: s_endpgm
61108
bb:
62109
%tmp = load i64, ptr addrspace(1) %arg, align 8
63110
%tmp3 = and i32 %arg2, 63
@@ -68,11 +115,22 @@ bb:
68115
ret void
69116
}
70117

71-
; GCN-LABEL: {{^}}alignbit_shr_pat_const30:
72-
; GCN: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
73-
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], 30
74-
75118
define amdgpu_kernel void @alignbit_shr_pat_const30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
119+
; GCN-LABEL: alignbit_shr_pat_const30:
120+
; GCN: ; %bb.0: ; %bb
121+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
122+
; GCN-NEXT: s_mov_b32 s7, 0xf000
123+
; GCN-NEXT: s_mov_b32 s6, -1
124+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
125+
; GCN-NEXT: s_mov_b32 s4, s0
126+
; GCN-NEXT: s_mov_b32 s5, s1
127+
; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
128+
; GCN-NEXT: s_mov_b32 s4, s2
129+
; GCN-NEXT: s_mov_b32 s5, s3
130+
; GCN-NEXT: s_waitcnt vmcnt(0)
131+
; GCN-NEXT: v_alignbit_b32 v0, v1, v0, 30
132+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
133+
; GCN-NEXT: s_endpgm
76134
bb:
77135
%tmp = load i64, ptr addrspace(1) %arg, align 8
78136
%tmp5 = lshr i64 %tmp, 30
@@ -81,12 +139,22 @@ bb:
81139
ret void
82140
}
83141

84-
; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_const33:
85-
; Negative test, shift amount more than 31
86-
; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
87-
; GCN-NOT: v_alignbit_b32
88-
89142
define amdgpu_kernel void @alignbit_shr_pat_wrong_const33(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
143+
; GCN-LABEL: alignbit_shr_pat_wrong_const33:
144+
; GCN: ; %bb.0: ; %bb
145+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
146+
; GCN-NEXT: s_mov_b32 s7, 0xf000
147+
; GCN-NEXT: s_mov_b32 s6, -1
148+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
149+
; GCN-NEXT: s_mov_b32 s4, s2
150+
; GCN-NEXT: s_mov_b32 s5, s3
151+
; GCN-NEXT: s_mov_b32 s2, s6
152+
; GCN-NEXT: s_mov_b32 s3, s7
153+
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
154+
; GCN-NEXT: s_waitcnt vmcnt(0)
155+
; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v0
156+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
157+
; GCN-NEXT: s_endpgm
90158
bb:
91159
%tmp = load i64, ptr addrspace(1) %arg, align 8
92160
%tmp5 = lshr i64 %tmp, 33

0 commit comments

Comments
 (0)