Skip to content

Commit cd538a6

Browse files
committed
[AMDGPU] Precommit fused-bitlogic.ll test. NFC.
1 parent 9eb8040 commit cd538a6

File tree

1 file changed

+349
-0
lines changed

1 file changed

+349
-0
lines changed
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
3+
4+
define amdgpu_kernel void @divergent_or3_b32(<3 x i32> addrspace(1)* %arg) {
5+
; GCN-LABEL: divergent_or3_b32:
6+
; GCN: ; %bb.0: ; %bb
7+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
8+
; GCN-NEXT: v_lshlrev_b32_e32 v3, 4, v0
9+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
10+
; GCN-NEXT: global_load_dwordx3 v[0:2], v3, s[0:1]
11+
; GCN-NEXT: s_waitcnt vmcnt(0)
12+
; GCN-NEXT: v_or_b32_e32 v0, v1, v0
13+
; GCN-NEXT: v_or_b32_e32 v0, v0, v2
14+
; GCN-NEXT: v_not_b32_e32 v0, v0
15+
; GCN-NEXT: global_store_dword v3, v0, s[0:1]
16+
; GCN-NEXT: s_endpgm
17+
bb:
18+
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
19+
%i1 = zext i32 %i to i64
20+
%i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
21+
%i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
22+
%i4 = extractelement <3 x i32> %i3, i64 0
23+
%i5 = extractelement <3 x i32> %i3, i64 1
24+
%i6 = extractelement <3 x i32> %i3, i64 2
25+
%i7 = or i32 %i5, %i4
26+
%i8 = or i32 %i7, %i6
27+
%i9 = xor i32 %i8, -1
28+
%i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
29+
store i32 %i9, i32 addrspace(1)* %i10, align 16
30+
ret void
31+
}
32+
33+
define amdgpu_kernel void @divergent_or3_b64(<3 x i64> addrspace(1)* %arg) {
34+
; GCN-LABEL: divergent_or3_b64:
35+
; GCN: ; %bb.0: ; %bb
36+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
37+
; GCN-NEXT: v_lshlrev_b32_e32 v6, 5, v0
38+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
39+
; GCN-NEXT: global_load_dwordx2 v[4:5], v6, s[0:1] offset:16
40+
; GCN-NEXT: global_load_dwordx4 v[0:3], v6, s[0:1]
41+
; GCN-NEXT: s_waitcnt vmcnt(0)
42+
; GCN-NEXT: v_or_b32_e32 v1, v3, v1
43+
; GCN-NEXT: v_or_b32_e32 v0, v2, v0
44+
; GCN-NEXT: v_or_b32_e32 v0, v0, v4
45+
; GCN-NEXT: v_or_b32_e32 v1, v1, v5
46+
; GCN-NEXT: v_not_b32_e32 v0, v0
47+
; GCN-NEXT: v_not_b32_e32 v1, v1
48+
; GCN-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1]
49+
; GCN-NEXT: s_endpgm
50+
bb:
51+
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
52+
%i1 = zext i32 %i to i64
53+
%i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
54+
%i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
55+
%i4 = extractelement <3 x i64> %i3, i64 0
56+
%i5 = extractelement <3 x i64> %i3, i64 1
57+
%i6 = extractelement <3 x i64> %i3, i64 2
58+
%i7 = or i64 %i5, %i4
59+
%i8 = or i64 %i7, %i6
60+
%i9 = xor i64 %i8, -1
61+
%i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
62+
store i64 %i9, i64 addrspace(1)* %i10, align 32
63+
ret void
64+
}
65+
66+
define amdgpu_kernel void @divergent_and3_b32(<3 x i32> addrspace(1)* %arg) {
67+
; GCN-LABEL: divergent_and3_b32:
68+
; GCN: ; %bb.0: ; %bb
69+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
70+
; GCN-NEXT: v_lshlrev_b32_e32 v3, 4, v0
71+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
72+
; GCN-NEXT: global_load_dwordx3 v[0:2], v3, s[0:1]
73+
; GCN-NEXT: s_waitcnt vmcnt(0)
74+
; GCN-NEXT: v_and_b32_e32 v0, v1, v0
75+
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
76+
; GCN-NEXT: v_not_b32_e32 v0, v0
77+
; GCN-NEXT: global_store_dword v3, v0, s[0:1]
78+
; GCN-NEXT: s_endpgm
79+
bb:
80+
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
81+
%i1 = zext i32 %i to i64
82+
%i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
83+
%i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
84+
%i4 = extractelement <3 x i32> %i3, i64 0
85+
%i5 = extractelement <3 x i32> %i3, i64 1
86+
%i6 = extractelement <3 x i32> %i3, i64 2
87+
%i7 = and i32 %i5, %i4
88+
%i8 = and i32 %i7, %i6
89+
%i9 = xor i32 %i8, -1
90+
%i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
91+
store i32 %i9, i32 addrspace(1)* %i10, align 16
92+
ret void
93+
}
94+
95+
define amdgpu_kernel void @divergent_and3_b64(<3 x i64> addrspace(1)* %arg) {
96+
; GCN-LABEL: divergent_and3_b64:
97+
; GCN: ; %bb.0: ; %bb
98+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
99+
; GCN-NEXT: v_lshlrev_b32_e32 v6, 5, v0
100+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
101+
; GCN-NEXT: global_load_dwordx2 v[4:5], v6, s[0:1] offset:16
102+
; GCN-NEXT: global_load_dwordx4 v[0:3], v6, s[0:1]
103+
; GCN-NEXT: s_waitcnt vmcnt(0)
104+
; GCN-NEXT: v_and_b32_e32 v1, v3, v1
105+
; GCN-NEXT: v_and_b32_e32 v0, v2, v0
106+
; GCN-NEXT: v_and_b32_e32 v0, v0, v4
107+
; GCN-NEXT: v_and_b32_e32 v1, v1, v5
108+
; GCN-NEXT: v_not_b32_e32 v0, v0
109+
; GCN-NEXT: v_not_b32_e32 v1, v1
110+
; GCN-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1]
111+
; GCN-NEXT: s_endpgm
112+
bb:
113+
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
114+
%i1 = zext i32 %i to i64
115+
%i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
116+
%i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
117+
%i4 = extractelement <3 x i64> %i3, i64 0
118+
%i5 = extractelement <3 x i64> %i3, i64 1
119+
%i6 = extractelement <3 x i64> %i3, i64 2
120+
%i7 = and i64 %i5, %i4
121+
%i8 = and i64 %i7, %i6
122+
%i9 = xor i64 %i8, -1
123+
%i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
124+
store i64 %i9, i64 addrspace(1)* %i10, align 32
125+
ret void
126+
}
127+
128+
define amdgpu_kernel void @divergent_xor3_b32(<3 x i32> addrspace(1)* %arg) {
129+
; GCN-LABEL: divergent_xor3_b32:
130+
; GCN: ; %bb.0: ; %bb
131+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
132+
; GCN-NEXT: v_lshlrev_b32_e32 v3, 4, v0
133+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
134+
; GCN-NEXT: global_load_dwordx3 v[0:2], v3, s[0:1]
135+
; GCN-NEXT: s_waitcnt vmcnt(0)
136+
; GCN-NEXT: v_xor_b32_e32 v0, v1, v0
137+
; GCN-NEXT: v_xnor_b32_e32 v0, v0, v2
138+
; GCN-NEXT: global_store_dword v3, v0, s[0:1]
139+
; GCN-NEXT: s_endpgm
140+
bb:
141+
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
142+
%i1 = zext i32 %i to i64
143+
%i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
144+
%i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
145+
%i4 = extractelement <3 x i32> %i3, i64 0
146+
%i5 = extractelement <3 x i32> %i3, i64 1
147+
%i6 = extractelement <3 x i32> %i3, i64 2
148+
%i7 = xor i32 %i5, %i4
149+
%i8 = xor i32 %i7, %i6
150+
%i9 = xor i32 %i8, -1
151+
%i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
152+
store i32 %i9, i32 addrspace(1)* %i10, align 16
153+
ret void
154+
}
155+
156+
define amdgpu_kernel void @divergent_xor3_b64(<3 x i64> addrspace(1)* %arg) {
157+
; GCN-LABEL: divergent_xor3_b64:
158+
; GCN: ; %bb.0: ; %bb
159+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
160+
; GCN-NEXT: v_lshlrev_b32_e32 v6, 5, v0
161+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
162+
; GCN-NEXT: global_load_dwordx2 v[4:5], v6, s[0:1] offset:16
163+
; GCN-NEXT: global_load_dwordx4 v[0:3], v6, s[0:1]
164+
; GCN-NEXT: s_waitcnt vmcnt(0)
165+
; GCN-NEXT: v_xor_b32_e32 v1, v3, v1
166+
; GCN-NEXT: v_xor_b32_e32 v0, v2, v0
167+
; GCN-NEXT: v_xnor_b32_e32 v0, v0, v4
168+
; GCN-NEXT: v_xnor_b32_e32 v1, v1, v5
169+
; GCN-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1]
170+
; GCN-NEXT: s_endpgm
171+
bb:
172+
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
173+
%i1 = zext i32 %i to i64
174+
%i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
175+
%i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
176+
%i4 = extractelement <3 x i64> %i3, i64 0
177+
%i5 = extractelement <3 x i64> %i3, i64 1
178+
%i6 = extractelement <3 x i64> %i3, i64 2
179+
%i7 = xor i64 %i5, %i4
180+
%i8 = xor i64 %i7, %i6
181+
%i9 = xor i64 %i8, -1
182+
%i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
183+
store i64 %i9, i64 addrspace(1)* %i10, align 32
184+
ret void
185+
}
186+
187+
define amdgpu_kernel void @uniform_or3_b32(<3 x i32> addrspace(1)* %arg) {
188+
; GCN-LABEL: uniform_or3_b32:
189+
; GCN: ; %bb.0: ; %bb
190+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
191+
; GCN-NEXT: v_mov_b32_e32 v0, 0
192+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
193+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
194+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
195+
; GCN-NEXT: s_or_b32 s0, s1, s0
196+
; GCN-NEXT: s_nor_b32 s0, s0, s2
197+
; GCN-NEXT: v_mov_b32_e32 v1, s0
198+
; GCN-NEXT: global_store_dword v0, v1, s[4:5]
199+
; GCN-NEXT: s_endpgm
200+
bb:
201+
%i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
202+
%i4 = extractelement <3 x i32> %i3, i64 0
203+
%i5 = extractelement <3 x i32> %i3, i64 1
204+
%i6 = extractelement <3 x i32> %i3, i64 2
205+
%i7 = or i32 %i5, %i4
206+
%i8 = or i32 %i7, %i6
207+
%i9 = xor i32 %i8, -1
208+
%i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
209+
store i32 %i9, i32 addrspace(1)* %i10, align 16
210+
ret void
211+
}
212+
213+
define amdgpu_kernel void @uniform_or3_b64(<3 x i64> addrspace(1)* %arg) {
214+
; GCN-LABEL: uniform_or3_b64:
215+
; GCN: ; %bb.0: ; %bb
216+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
217+
; GCN-NEXT: v_mov_b32_e32 v2, 0
218+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
219+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
220+
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
221+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
222+
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
223+
; GCN-NEXT: s_nor_b64 s[0:1], s[0:1], s[6:7]
224+
; GCN-NEXT: v_mov_b32_e32 v0, s0
225+
; GCN-NEXT: v_mov_b32_e32 v1, s1
226+
; GCN-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
227+
; GCN-NEXT: s_endpgm
228+
bb:
229+
%i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
230+
%i4 = extractelement <3 x i64> %i3, i64 0
231+
%i5 = extractelement <3 x i64> %i3, i64 1
232+
%i6 = extractelement <3 x i64> %i3, i64 2
233+
%i7 = or i64 %i5, %i4
234+
%i8 = or i64 %i7, %i6
235+
%i9 = xor i64 %i8, -1
236+
%i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
237+
store i64 %i9, i64 addrspace(1)* %i10, align 32
238+
ret void
239+
}
240+
241+
define amdgpu_kernel void @uniform_and3_b32(<3 x i32> addrspace(1)* %arg) {
242+
; GCN-LABEL: uniform_and3_b32:
243+
; GCN: ; %bb.0: ; %bb
244+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
245+
; GCN-NEXT: v_mov_b32_e32 v0, 0
246+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
247+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
248+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
249+
; GCN-NEXT: s_and_b32 s0, s1, s0
250+
; GCN-NEXT: s_nand_b32 s0, s0, s2
251+
; GCN-NEXT: v_mov_b32_e32 v1, s0
252+
; GCN-NEXT: global_store_dword v0, v1, s[4:5]
253+
; GCN-NEXT: s_endpgm
254+
bb:
255+
%i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
256+
%i4 = extractelement <3 x i32> %i3, i64 0
257+
%i5 = extractelement <3 x i32> %i3, i64 1
258+
%i6 = extractelement <3 x i32> %i3, i64 2
259+
%i7 = and i32 %i5, %i4
260+
%i8 = and i32 %i7, %i6
261+
%i9 = xor i32 %i8, -1
262+
%i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
263+
store i32 %i9, i32 addrspace(1)* %i10, align 16
264+
ret void
265+
}
266+
267+
define amdgpu_kernel void @uniform_and3_b64(<3 x i64> addrspace(1)* %arg) {
268+
; GCN-LABEL: uniform_and3_b64:
269+
; GCN: ; %bb.0: ; %bb
270+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
271+
; GCN-NEXT: v_mov_b32_e32 v2, 0
272+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
273+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
274+
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
275+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
276+
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
277+
; GCN-NEXT: s_nand_b64 s[0:1], s[0:1], s[6:7]
278+
; GCN-NEXT: v_mov_b32_e32 v0, s0
279+
; GCN-NEXT: v_mov_b32_e32 v1, s1
280+
; GCN-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
281+
; GCN-NEXT: s_endpgm
282+
bb:
283+
%i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
284+
%i4 = extractelement <3 x i64> %i3, i64 0
285+
%i5 = extractelement <3 x i64> %i3, i64 1
286+
%i6 = extractelement <3 x i64> %i3, i64 2
287+
%i7 = and i64 %i5, %i4
288+
%i8 = and i64 %i7, %i6
289+
%i9 = xor i64 %i8, -1
290+
%i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
291+
store i64 %i9, i64 addrspace(1)* %i10, align 32
292+
ret void
293+
}
294+
295+
define amdgpu_kernel void @uniform_xor3_b32(<3 x i32> addrspace(1)* %arg) {
296+
; GCN-LABEL: uniform_xor3_b32:
297+
; GCN: ; %bb.0: ; %bb
298+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
299+
; GCN-NEXT: v_mov_b32_e32 v0, 0
300+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
301+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
302+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
303+
; GCN-NEXT: s_xor_b32 s0, s1, s0
304+
; GCN-NEXT: s_xnor_b32 s0, s0, s2
305+
; GCN-NEXT: v_mov_b32_e32 v1, s0
306+
; GCN-NEXT: global_store_dword v0, v1, s[4:5]
307+
; GCN-NEXT: s_endpgm
308+
bb:
309+
%i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
310+
%i4 = extractelement <3 x i32> %i3, i64 0
311+
%i5 = extractelement <3 x i32> %i3, i64 1
312+
%i6 = extractelement <3 x i32> %i3, i64 2
313+
%i7 = xor i32 %i5, %i4
314+
%i8 = xor i32 %i7, %i6
315+
%i9 = xor i32 %i8, -1
316+
%i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
317+
store i32 %i9, i32 addrspace(1)* %i10, align 16
318+
ret void
319+
}
320+
321+
define amdgpu_kernel void @uniform_xor3_b64(<3 x i64> addrspace(1)* %arg) {
322+
; GCN-LABEL: uniform_xor3_b64:
323+
; GCN: ; %bb.0: ; %bb
324+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
325+
; GCN-NEXT: v_mov_b32_e32 v2, 0
326+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
327+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
328+
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
329+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
330+
; GCN-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
331+
; GCN-NEXT: s_xnor_b64 s[0:1], s[0:1], s[6:7]
332+
; GCN-NEXT: v_mov_b32_e32 v0, s0
333+
; GCN-NEXT: v_mov_b32_e32 v1, s1
334+
; GCN-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
335+
; GCN-NEXT: s_endpgm
336+
bb:
337+
%i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
338+
%i4 = extractelement <3 x i64> %i3, i64 0
339+
%i5 = extractelement <3 x i64> %i3, i64 1
340+
%i6 = extractelement <3 x i64> %i3, i64 2
341+
%i7 = xor i64 %i5, %i4
342+
%i8 = xor i64 %i7, %i6
343+
%i9 = xor i64 %i8, -1
344+
%i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
345+
store i64 %i9, i64 addrspace(1)* %i10, align 32
346+
ret void
347+
}
348+
349+
declare i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)