Skip to content

Commit b9b9add

Browse files
committed
[AArch64] Add bitcast + extend tests. NFC
1 parent 2e54b4f commit b9b9add

File tree

1 file changed

+273
-0
lines changed

1 file changed

+273
-0
lines changed
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
define <4 x i16> @z_i32_v4i16(i32 %x) {
6+
; CHECK-SD-LABEL: z_i32_v4i16:
7+
; CHECK-SD: // %bb.0:
8+
; CHECK-SD-NEXT: fmov s0, w0
9+
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
10+
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
11+
; CHECK-SD-NEXT: ret
12+
;
13+
; CHECK-GI-LABEL: z_i32_v4i16:
14+
; CHECK-GI: // %bb.0:
15+
; CHECK-GI-NEXT: fmov s0, w0
16+
; CHECK-GI-NEXT: mov b1, v0.b[1]
17+
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
18+
; CHECK-GI-NEXT: mov b3, v0.b[2]
19+
; CHECK-GI-NEXT: mov b0, v0.b[3]
20+
; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
21+
; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
22+
; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
23+
; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0
24+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
25+
; CHECK-GI-NEXT: ret
26+
%b = bitcast i32 %x to <4 x i8>
27+
%e = zext <4 x i8> %b to <4 x i16>
28+
ret <4 x i16> %e
29+
}
30+
31+
define <4 x i32> @z_i32_v4i32(i32 %x) {
32+
; CHECK-SD-LABEL: z_i32_v4i32:
33+
; CHECK-SD: // %bb.0:
34+
; CHECK-SD-NEXT: fmov s0, w0
35+
; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff
36+
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
37+
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
38+
; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
39+
; CHECK-SD-NEXT: ret
40+
;
41+
; CHECK-GI-LABEL: z_i32_v4i32:
42+
; CHECK-GI: // %bb.0:
43+
; CHECK-GI-NEXT: fmov s0, w0
44+
; CHECK-GI-NEXT: mov b1, v0.b[2]
45+
; CHECK-GI-NEXT: mov b2, v0.b[1]
46+
; CHECK-GI-NEXT: mov b3, v0.b[3]
47+
; CHECK-GI-NEXT: fmov w8, s0
48+
; CHECK-GI-NEXT: fmov w9, s1
49+
; CHECK-GI-NEXT: uxtb w8, w8
50+
; CHECK-GI-NEXT: fmov w10, s2
51+
; CHECK-GI-NEXT: fmov w11, s3
52+
; CHECK-GI-NEXT: fmov s0, w8
53+
; CHECK-GI-NEXT: uxtb w9, w9
54+
; CHECK-GI-NEXT: uxtb w10, w10
55+
; CHECK-GI-NEXT: uxtb w11, w11
56+
; CHECK-GI-NEXT: fmov s1, w9
57+
; CHECK-GI-NEXT: mov v0.h[1], w10
58+
; CHECK-GI-NEXT: mov v1.h[1], w11
59+
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
60+
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
61+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
62+
; CHECK-GI-NEXT: ret
63+
%b = bitcast i32 %x to <4 x i8>
64+
%e = zext <4 x i8> %b to <4 x i32>
65+
ret <4 x i32> %e
66+
}
67+
68+
define <4 x i64> @z_i32_v4i64(i32 %x) {
69+
; CHECK-SD-LABEL: z_i32_v4i64:
70+
; CHECK-SD: // %bb.0:
71+
; CHECK-SD-NEXT: fmov s0, w0
72+
; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff
73+
; CHECK-SD-NEXT: umov w8, v0.b[2]
74+
; CHECK-SD-NEXT: umov w9, v0.b[0]
75+
; CHECK-SD-NEXT: umov w10, v0.b[3]
76+
; CHECK-SD-NEXT: umov w11, v0.b[1]
77+
; CHECK-SD-NEXT: fmov s0, w9
78+
; CHECK-SD-NEXT: fmov s2, w8
79+
; CHECK-SD-NEXT: mov v0.s[1], w11
80+
; CHECK-SD-NEXT: mov v2.s[1], w10
81+
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
82+
; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
83+
; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
84+
; CHECK-SD-NEXT: and v1.16b, v2.16b, v1.16b
85+
; CHECK-SD-NEXT: ret
86+
;
87+
; CHECK-GI-LABEL: z_i32_v4i64:
88+
; CHECK-GI: // %bb.0:
89+
; CHECK-GI-NEXT: fmov s0, w0
90+
; CHECK-GI-NEXT: mov b1, v0.b[2]
91+
; CHECK-GI-NEXT: fmov w8, s0
92+
; CHECK-GI-NEXT: mov b2, v0.b[1]
93+
; CHECK-GI-NEXT: mov b3, v0.b[3]
94+
; CHECK-GI-NEXT: ubfx x8, x8, #0, #8
95+
; CHECK-GI-NEXT: fmov w9, s1
96+
; CHECK-GI-NEXT: mov v0.d[0], x8
97+
; CHECK-GI-NEXT: fmov w8, s2
98+
; CHECK-GI-NEXT: ubfx x9, x9, #0, #8
99+
; CHECK-GI-NEXT: ubfx x8, x8, #0, #8
100+
; CHECK-GI-NEXT: mov v1.d[0], x9
101+
; CHECK-GI-NEXT: fmov w9, s3
102+
; CHECK-GI-NEXT: mov v0.d[1], x8
103+
; CHECK-GI-NEXT: ubfx x9, x9, #0, #8
104+
; CHECK-GI-NEXT: mov v1.d[1], x9
105+
; CHECK-GI-NEXT: ret
106+
%b = bitcast i32 %x to <4 x i8>
107+
%e = zext <4 x i8> %b to <4 x i64>
108+
ret <4 x i64> %e
109+
}
110+
111+
define <4 x i16> @s_i32_v4i16(i32 %x) {
112+
; CHECK-SD-LABEL: s_i32_v4i16:
113+
; CHECK-SD: // %bb.0:
114+
; CHECK-SD-NEXT: fmov s0, w0
115+
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
116+
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
117+
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
118+
; CHECK-SD-NEXT: ret
119+
;
120+
; CHECK-GI-LABEL: s_i32_v4i16:
121+
; CHECK-GI: // %bb.0:
122+
; CHECK-GI-NEXT: fmov s0, w0
123+
; CHECK-GI-NEXT: mov b1, v0.b[1]
124+
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
125+
; CHECK-GI-NEXT: mov b3, v0.b[2]
126+
; CHECK-GI-NEXT: mov b0, v0.b[3]
127+
; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
128+
; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
129+
; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
130+
; CHECK-GI-NEXT: sshll v0.8h, v2.8b, #0
131+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
132+
; CHECK-GI-NEXT: ret
133+
%b = bitcast i32 %x to <4 x i8>
134+
%e = sext <4 x i8> %b to <4 x i16>
135+
ret <4 x i16> %e
136+
}
137+
138+
define <4 x i32> @s_i32_v4i32(i32 %x) {
139+
; CHECK-SD-LABEL: s_i32_v4i32:
140+
; CHECK-SD: // %bb.0:
141+
; CHECK-SD-NEXT: fmov s0, w0
142+
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
143+
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
144+
; CHECK-SD-NEXT: shl v0.4s, v0.4s, #24
145+
; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #24
146+
; CHECK-SD-NEXT: ret
147+
;
148+
; CHECK-GI-LABEL: s_i32_v4i32:
149+
; CHECK-GI: // %bb.0:
150+
; CHECK-GI-NEXT: fmov s0, w0
151+
; CHECK-GI-NEXT: mov b1, v0.b[2]
152+
; CHECK-GI-NEXT: mov b2, v0.b[1]
153+
; CHECK-GI-NEXT: mov b3, v0.b[3]
154+
; CHECK-GI-NEXT: fmov w8, s0
155+
; CHECK-GI-NEXT: fmov w9, s1
156+
; CHECK-GI-NEXT: sxtb w8, w8
157+
; CHECK-GI-NEXT: fmov w10, s2
158+
; CHECK-GI-NEXT: fmov w11, s3
159+
; CHECK-GI-NEXT: fmov s0, w8
160+
; CHECK-GI-NEXT: sxtb w9, w9
161+
; CHECK-GI-NEXT: sxtb w10, w10
162+
; CHECK-GI-NEXT: sxtb w11, w11
163+
; CHECK-GI-NEXT: fmov s1, w9
164+
; CHECK-GI-NEXT: mov v0.h[1], w10
165+
; CHECK-GI-NEXT: mov v1.h[1], w11
166+
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
167+
; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
168+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
169+
; CHECK-GI-NEXT: ret
170+
%b = bitcast i32 %x to <4 x i8>
171+
%e = sext <4 x i8> %b to <4 x i32>
172+
ret <4 x i32> %e
173+
}
174+
175+
define <4 x i64> @s_i32_v4i64(i32 %x) {
176+
; CHECK-SD-LABEL: s_i32_v4i64:
177+
; CHECK-SD: // %bb.0:
178+
; CHECK-SD-NEXT: fmov s0, w0
179+
; CHECK-SD-NEXT: umov w8, v0.b[2]
180+
; CHECK-SD-NEXT: umov w9, v0.b[0]
181+
; CHECK-SD-NEXT: umov w10, v0.b[3]
182+
; CHECK-SD-NEXT: umov w11, v0.b[1]
183+
; CHECK-SD-NEXT: fmov s0, w9
184+
; CHECK-SD-NEXT: fmov s1, w8
185+
; CHECK-SD-NEXT: mov v0.s[1], w11
186+
; CHECK-SD-NEXT: mov v1.s[1], w10
187+
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
188+
; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
189+
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
190+
; CHECK-SD-NEXT: shl v1.2d, v1.2d, #56
191+
; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56
192+
; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #56
193+
; CHECK-SD-NEXT: ret
194+
;
195+
; CHECK-GI-LABEL: s_i32_v4i64:
196+
; CHECK-GI: // %bb.0:
197+
; CHECK-GI-NEXT: fmov s0, w0
198+
; CHECK-GI-NEXT: mov b1, v0.b[2]
199+
; CHECK-GI-NEXT: fmov w8, s0
200+
; CHECK-GI-NEXT: mov b2, v0.b[1]
201+
; CHECK-GI-NEXT: mov b3, v0.b[3]
202+
; CHECK-GI-NEXT: sxtb x8, w8
203+
; CHECK-GI-NEXT: fmov w9, s1
204+
; CHECK-GI-NEXT: mov v0.d[0], x8
205+
; CHECK-GI-NEXT: fmov w8, s2
206+
; CHECK-GI-NEXT: sxtb x9, w9
207+
; CHECK-GI-NEXT: sxtb x8, w8
208+
; CHECK-GI-NEXT: mov v1.d[0], x9
209+
; CHECK-GI-NEXT: fmov w9, s3
210+
; CHECK-GI-NEXT: mov v0.d[1], x8
211+
; CHECK-GI-NEXT: sxtb x9, w9
212+
; CHECK-GI-NEXT: mov v1.d[1], x9
213+
; CHECK-GI-NEXT: ret
214+
%b = bitcast i32 %x to <4 x i8>
215+
%e = sext <4 x i8> %b to <4 x i64>
216+
ret <4 x i64> %e
217+
}
218+
219+
define void @extractbitcastext(i32 %bytes, ptr %output) {
220+
; CHECK-LABEL: extractbitcastext:
221+
; CHECK: // %bb.0:
222+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
223+
; CHECK-NEXT: sxtw x8, w0
224+
; CHECK-NEXT: fmov d0, x8
225+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
226+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
227+
; CHECK-NEXT: ushll v1.2d, v0.2s, #0
228+
; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
229+
; CHECK-NEXT: stp q1, q0, [x1]
230+
; CHECK-NEXT: ret
231+
%conv = sext i32 %bytes to i64
232+
%b0 = bitcast i64 %conv to <8 x i8>
233+
%b1 = zext <8 x i8> %b0 to <8 x i16>
234+
%shuffle.i = shufflevector <8 x i16> %b1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
235+
%z2 = zext nneg <4 x i16> %shuffle.i to <4 x i32>
236+
%shuffle.i23 = shufflevector <4 x i32> %z2, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
237+
%z3 = zext nneg <2 x i32> %shuffle.i23 to <2 x i64>
238+
%shuffle.i24 = shufflevector <4 x i32> %z2, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
239+
%z4 = zext nneg <2 x i32> %shuffle.i24 to <2 x i64>
240+
store <2 x i64> %z3, ptr %output, align 8
241+
%add.ptr = getelementptr inbounds nuw i8, ptr %output, i64 16
242+
store <2 x i64> %z4, ptr %add.ptr, align 8
243+
ret void
244+
}
245+
246+
define void @extractbitcastext_s(i32 %bytes, ptr %output) {
247+
; CHECK-LABEL: extractbitcastext_s:
248+
; CHECK: // %bb.0:
249+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
250+
; CHECK-NEXT: sxtw x8, w0
251+
; CHECK-NEXT: fmov d0, x8
252+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
253+
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
254+
; CHECK-NEXT: sshll v1.2d, v0.2s, #0
255+
; CHECK-NEXT: sshll2 v0.2d, v0.4s, #0
256+
; CHECK-NEXT: stp q1, q0, [x1]
257+
; CHECK-NEXT: ret
258+
%conv = sext i32 %bytes to i64
259+
%b0 = bitcast i64 %conv to <8 x i8>
260+
%b1 = sext <8 x i8> %b0 to <8 x i16>
261+
%shuffle.i = shufflevector <8 x i16> %b1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
262+
%s2 = sext <4 x i16> %shuffle.i to <4 x i32>
263+
%shuffle.i23 = shufflevector <4 x i32> %s2, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
264+
%s3 = sext <2 x i32> %shuffle.i23 to <2 x i64>
265+
%shuffle.i24 = shufflevector <4 x i32> %s2, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
266+
%s4 = sext <2 x i32> %shuffle.i24 to <2 x i64>
267+
store <2 x i64> %s3, ptr %output, align 8
268+
%add.ptr = getelementptr inbounds nuw i8, ptr %output, i64 16
269+
store <2 x i64> %s4, ptr %add.ptr, align 8
270+
ret void
271+
}
272+
273+

0 commit comments

Comments
 (0)