11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
3-
4- ; rdar://12471808
2+ ; RUN: llc -mtriple=armv7-eabihf -mattr=+neon %s -o - | FileCheck %s
53
64define <8 x i8 > @v_bsli8 (ptr %A , ptr %B , ptr %C ) nounwind {
75; CHECK-LABEL: v_bsli8:
86; CHECK: @ %bb.0:
97; CHECK-NEXT: vldr d18, [r0]
108; CHECK-NEXT: vldr d16, [r2]
9+ ; CHECK-NEXT: vorr d0, d18, d18
1110; CHECK-NEXT: vldr d17, [r1]
12- ; CHECK-NEXT: vbit d16, d17, d18
13- ; CHECK-NEXT: vmov r0, r1, d16
14- ; CHECK-NEXT: mov pc, lr
11+ ; CHECK-NEXT: vbsl d0, d17, d16
12+ ; CHECK-NEXT: bx lr
1513 %tmp1 = load <8 x i8 >, ptr %A
1614 %tmp2 = load <8 x i8 >, ptr %B
1715 %tmp3 = load <8 x i8 >, ptr %C
@@ -27,10 +25,10 @@ define <4 x i16> @v_bsli16(ptr %A, ptr %B, ptr %C) nounwind {
2725; CHECK: @ %bb.0:
2826; CHECK-NEXT: vldr d18, [r0]
2927; CHECK-NEXT: vldr d16, [r2]
28+ ; CHECK-NEXT: vorr d0, d18, d18
3029; CHECK-NEXT: vldr d17, [r1]
31- ; CHECK-NEXT: vbit d16, d17, d18
32- ; CHECK-NEXT: vmov r0, r1, d16
33- ; CHECK-NEXT: mov pc, lr
30+ ; CHECK-NEXT: vbsl d0, d17, d16
31+ ; CHECK-NEXT: bx lr
3432 %tmp1 = load <4 x i16 >, ptr %A
3533 %tmp2 = load <4 x i16 >, ptr %B
3634 %tmp3 = load <4 x i16 >, ptr %C
@@ -46,10 +44,10 @@ define <2 x i32> @v_bsli32(ptr %A, ptr %B, ptr %C) nounwind {
4644; CHECK: @ %bb.0:
4745; CHECK-NEXT: vldr d18, [r0]
4846; CHECK-NEXT: vldr d16, [r2]
47+ ; CHECK-NEXT: vorr d0, d18, d18
4948; CHECK-NEXT: vldr d17, [r1]
50- ; CHECK-NEXT: vbit d16, d17, d18
51- ; CHECK-NEXT: vmov r0, r1, d16
52- ; CHECK-NEXT: mov pc, lr
49+ ; CHECK-NEXT: vbsl d0, d17, d16
50+ ; CHECK-NEXT: bx lr
5351 %tmp1 = load <2 x i32 >, ptr %A
5452 %tmp2 = load <2 x i32 >, ptr %B
5553 %tmp3 = load <2 x i32 >, ptr %C
@@ -65,10 +63,10 @@ define <1 x i64> @v_bsli64(ptr %A, ptr %B, ptr %C) nounwind {
6563; CHECK: @ %bb.0:
6664; CHECK-NEXT: vldr d18, [r0]
6765; CHECK-NEXT: vldr d16, [r2]
66+ ; CHECK-NEXT: vorr d0, d18, d18
6867; CHECK-NEXT: vldr d17, [r1]
69- ; CHECK-NEXT: vbit d16, d17, d18
70- ; CHECK-NEXT: vmov r0, r1, d16
71- ; CHECK-NEXT: mov pc, lr
68+ ; CHECK-NEXT: vbsl d0, d17, d16
69+ ; CHECK-NEXT: bx lr
7270 %tmp1 = load <1 x i64 >, ptr %A
7371 %tmp2 = load <1 x i64 >, ptr %B
7472 %tmp3 = load <1 x i64 >, ptr %C
@@ -83,12 +81,11 @@ define <16 x i8> @v_bslQi8(ptr %A, ptr %B, ptr %C) nounwind {
8381; CHECK-LABEL: v_bslQi8:
8482; CHECK: @ %bb.0:
8583; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
84+ ; CHECK-NEXT: vorr q0, q10, q10
8685; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
8786; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
88- ; CHECK-NEXT: vbit q8, q9, q10
89- ; CHECK-NEXT: vmov r0, r1, d16
90- ; CHECK-NEXT: vmov r2, r3, d17
91- ; CHECK-NEXT: mov pc, lr
87+ ; CHECK-NEXT: vbsl q0, q9, q8
88+ ; CHECK-NEXT: bx lr
9289 %tmp1 = load <16 x i8 >, ptr %A
9390 %tmp2 = load <16 x i8 >, ptr %B
9491 %tmp3 = load <16 x i8 >, ptr %C
@@ -103,12 +100,11 @@ define <8 x i16> @v_bslQi16(ptr %A, ptr %B, ptr %C) nounwind {
103100; CHECK-LABEL: v_bslQi16:
104101; CHECK: @ %bb.0:
105102; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
103+ ; CHECK-NEXT: vorr q0, q10, q10
106104; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
107105; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
108- ; CHECK-NEXT: vbit q8, q9, q10
109- ; CHECK-NEXT: vmov r0, r1, d16
110- ; CHECK-NEXT: vmov r2, r3, d17
111- ; CHECK-NEXT: mov pc, lr
106+ ; CHECK-NEXT: vbsl q0, q9, q8
107+ ; CHECK-NEXT: bx lr
112108 %tmp1 = load <8 x i16 >, ptr %A
113109 %tmp2 = load <8 x i16 >, ptr %B
114110 %tmp3 = load <8 x i16 >, ptr %C
@@ -123,12 +119,11 @@ define <4 x i32> @v_bslQi32(ptr %A, ptr %B, ptr %C) nounwind {
123119; CHECK-LABEL: v_bslQi32:
124120; CHECK: @ %bb.0:
125121; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
122+ ; CHECK-NEXT: vorr q0, q10, q10
126123; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
127124; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
128- ; CHECK-NEXT: vbit q8, q9, q10
129- ; CHECK-NEXT: vmov r0, r1, d16
130- ; CHECK-NEXT: vmov r2, r3, d17
131- ; CHECK-NEXT: mov pc, lr
125+ ; CHECK-NEXT: vbsl q0, q9, q8
126+ ; CHECK-NEXT: bx lr
132127 %tmp1 = load <4 x i32 >, ptr %A
133128 %tmp2 = load <4 x i32 >, ptr %B
134129 %tmp3 = load <4 x i32 >, ptr %C
@@ -143,12 +138,11 @@ define <2 x i64> @v_bslQi64(ptr %A, ptr %B, ptr %C) nounwind {
143138; CHECK-LABEL: v_bslQi64:
144139; CHECK: @ %bb.0:
145140; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
141+ ; CHECK-NEXT: vorr q0, q10, q10
146142; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
147143; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
148- ; CHECK-NEXT: vbit q8, q9, q10
149- ; CHECK-NEXT: vmov r0, r1, d16
150- ; CHECK-NEXT: vmov r2, r3, d17
151- ; CHECK-NEXT: mov pc, lr
144+ ; CHECK-NEXT: vbsl q0, q9, q8
145+ ; CHECK-NEXT: bx lr
152146 %tmp1 = load <2 x i64 >, ptr %A
153147 %tmp2 = load <2 x i64 >, ptr %B
154148 %tmp3 = load <2 x i64 >, ptr %C
@@ -162,179 +156,107 @@ define <2 x i64> @v_bslQi64(ptr %A, ptr %B, ptr %C) nounwind {
162156define <8 x i8 > @f1 (<8 x i8 > %a , <8 x i8 > %b , <8 x i8 > %c ) nounwind readnone optsize ssp {
163157; CHECK-LABEL: f1:
164158; CHECK: @ %bb.0:
165- ; CHECK-NEXT: vldr d16, [sp]
166- ; CHECK-NEXT: vmov d17, r2, r3
167- ; CHECK-NEXT: vmov d18, r0, r1
168- ; CHECK-NEXT: vbit d16, d17, d18
169- ; CHECK-NEXT: vmov r0, r1, d16
170- ; CHECK-NEXT: mov pc, lr
159+ ; CHECK-NEXT: vbsl d0, d1, d2
160+ ; CHECK-NEXT: bx lr
171161 %vbsl.i = tail call <8 x i8 > @llvm.arm.neon.vbsl.v8i8 (<8 x i8 > %a , <8 x i8 > %b , <8 x i8 > %c ) nounwind
172162 ret <8 x i8 > %vbsl.i
173163}
174164
175165define <4 x i16 > @f2 (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %c ) nounwind readnone optsize ssp {
176166; CHECK-LABEL: f2:
177167; CHECK: @ %bb.0:
178- ; CHECK-NEXT: vldr d16, [sp]
179- ; CHECK-NEXT: vmov d17, r2, r3
180- ; CHECK-NEXT: vmov d18, r0, r1
181- ; CHECK-NEXT: vbit d16, d17, d18
182- ; CHECK-NEXT: vmov r0, r1, d16
183- ; CHECK-NEXT: mov pc, lr
168+ ; CHECK-NEXT: vbsl d0, d1, d2
169+ ; CHECK-NEXT: bx lr
184170 %vbsl3.i = tail call <4 x i16 > @llvm.arm.neon.vbsl.v4i16 (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %c ) nounwind
185171 ret <4 x i16 > %vbsl3.i
186172}
187173
188174define <2 x i32 > @f3 (<2 x i32 > %a , <2 x i32 > %b , <2 x i32 > %c ) nounwind readnone optsize ssp {
189175; CHECK-LABEL: f3:
190176; CHECK: @ %bb.0:
191- ; CHECK-NEXT: vldr d16, [sp]
192- ; CHECK-NEXT: vmov d17, r2, r3
193- ; CHECK-NEXT: vmov d18, r0, r1
194- ; CHECK-NEXT: vbit d16, d17, d18
195- ; CHECK-NEXT: vmov r0, r1, d16
196- ; CHECK-NEXT: mov pc, lr
177+ ; CHECK-NEXT: vbsl d0, d1, d2
178+ ; CHECK-NEXT: bx lr
197179 %vbsl3.i = tail call <2 x i32 > @llvm.arm.neon.vbsl.v2i32 (<2 x i32 > %a , <2 x i32 > %b , <2 x i32 > %c ) nounwind
198180 ret <2 x i32 > %vbsl3.i
199181}
200182
201183define <2 x float > @f4 (<2 x float > %a , <2 x float > %b , <2 x float > %c ) nounwind readnone optsize ssp {
202184; CHECK-LABEL: f4:
203185; CHECK: @ %bb.0:
204- ; CHECK-NEXT: vldr d16, [sp]
205- ; CHECK-NEXT: vmov d17, r2, r3
206- ; CHECK-NEXT: vmov d18, r0, r1
207- ; CHECK-NEXT: vbit d16, d17, d18
208- ; CHECK-NEXT: vmov r0, r1, d16
209- ; CHECK-NEXT: mov pc, lr
186+ ; CHECK-NEXT: vbsl d0, d1, d2
187+ ; CHECK-NEXT: bx lr
210188 %vbsl4.i = tail call <2 x float > @llvm.arm.neon.vbsl.v2f32 (<2 x float > %a , <2 x float > %b , <2 x float > %c ) nounwind
211189 ret <2 x float > %vbsl4.i
212190}
213191
214192define <16 x i8 > @g1 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c ) nounwind readnone optsize ssp {
215193; CHECK-LABEL: g1:
216194; CHECK: @ %bb.0:
217- ; CHECK-NEXT: vmov d19, r2, r3
218- ; CHECK-NEXT: add r12, sp, #16
219- ; CHECK-NEXT: vmov d18, r0, r1
220- ; CHECK-NEXT: mov r0, sp
221- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
222- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
223- ; CHECK-NEXT: vbit q8, q10, q9
224- ; CHECK-NEXT: vmov r0, r1, d16
225- ; CHECK-NEXT: vmov r2, r3, d17
226- ; CHECK-NEXT: mov pc, lr
195+ ; CHECK-NEXT: vbsl q0, q1, q2
196+ ; CHECK-NEXT: bx lr
227197 %vbsl.i = tail call <16 x i8 > @llvm.arm.neon.vbsl.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c ) nounwind
228198 ret <16 x i8 > %vbsl.i
229199}
230200
231201define <8 x i16 > @g2 (<8 x i16 > %a , <8 x i16 > %b , <8 x i16 > %c ) nounwind readnone optsize ssp {
232202; CHECK-LABEL: g2:
233203; CHECK: @ %bb.0:
234- ; CHECK-NEXT: vmov d19, r2, r3
235- ; CHECK-NEXT: add r12, sp, #16
236- ; CHECK-NEXT: vmov d18, r0, r1
237- ; CHECK-NEXT: mov r0, sp
238- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
239- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
240- ; CHECK-NEXT: vbit q8, q10, q9
241- ; CHECK-NEXT: vmov r0, r1, d16
242- ; CHECK-NEXT: vmov r2, r3, d17
243- ; CHECK-NEXT: mov pc, lr
204+ ; CHECK-NEXT: vbsl q0, q1, q2
205+ ; CHECK-NEXT: bx lr
244206 %vbsl3.i = tail call <8 x i16 > @llvm.arm.neon.vbsl.v8i16 (<8 x i16 > %a , <8 x i16 > %b , <8 x i16 > %c ) nounwind
245207 ret <8 x i16 > %vbsl3.i
246208}
247209
248210define <4 x i32 > @g3 (<4 x i32 > %a , <4 x i32 > %b , <4 x i32 > %c ) nounwind readnone optsize ssp {
249211; CHECK-LABEL: g3:
250212; CHECK: @ %bb.0:
251- ; CHECK-NEXT: vmov d19, r2, r3
252- ; CHECK-NEXT: add r12, sp, #16
253- ; CHECK-NEXT: vmov d18, r0, r1
254- ; CHECK-NEXT: mov r0, sp
255- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
256- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
257- ; CHECK-NEXT: vbit q8, q10, q9
258- ; CHECK-NEXT: vmov r0, r1, d16
259- ; CHECK-NEXT: vmov r2, r3, d17
260- ; CHECK-NEXT: mov pc, lr
213+ ; CHECK-NEXT: vbsl q0, q1, q2
214+ ; CHECK-NEXT: bx lr
261215 %vbsl3.i = tail call <4 x i32 > @llvm.arm.neon.vbsl.v4i32 (<4 x i32 > %a , <4 x i32 > %b , <4 x i32 > %c ) nounwind
262216 ret <4 x i32 > %vbsl3.i
263217}
264218
265219define <4 x float > @g4 (<4 x float > %a , <4 x float > %b , <4 x float > %c ) nounwind readnone optsize ssp {
266220; CHECK-LABEL: g4:
267221; CHECK: @ %bb.0:
268- ; CHECK-NEXT: vmov d19, r2, r3
269- ; CHECK-NEXT: add r12, sp, #16
270- ; CHECK-NEXT: vmov d18, r0, r1
271- ; CHECK-NEXT: mov r0, sp
272- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
273- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
274- ; CHECK-NEXT: vbit q8, q10, q9
275- ; CHECK-NEXT: vmov r0, r1, d16
276- ; CHECK-NEXT: vmov r2, r3, d17
277- ; CHECK-NEXT: mov pc, lr
222+ ; CHECK-NEXT: vbsl q0, q1, q2
223+ ; CHECK-NEXT: bx lr
278224 %vbsl4.i = tail call <4 x float > @llvm.arm.neon.vbsl.v4f32 (<4 x float > %a , <4 x float > %b , <4 x float > %c ) nounwind
279225 ret <4 x float > %vbsl4.i
280226}
281227
282228define <1 x i64 > @test_vbsl_s64 (<1 x i64 > %a , <1 x i64 > %b , <1 x i64 > %c ) nounwind readnone optsize ssp {
283229; CHECK-LABEL: test_vbsl_s64:
284230; CHECK: @ %bb.0:
285- ; CHECK-NEXT: vldr d16, [sp]
286- ; CHECK-NEXT: vmov d17, r2, r3
287- ; CHECK-NEXT: vmov d18, r0, r1
288- ; CHECK-NEXT: vbit d16, d17, d18
289- ; CHECK-NEXT: vmov r0, r1, d16
290- ; CHECK-NEXT: mov pc, lr
231+ ; CHECK-NEXT: vbsl d0, d1, d2
232+ ; CHECK-NEXT: bx lr
291233 %vbsl3.i = tail call <1 x i64 > @llvm.arm.neon.vbsl.v1i64 (<1 x i64 > %a , <1 x i64 > %b , <1 x i64 > %c ) nounwind
292234 ret <1 x i64 > %vbsl3.i
293235}
294236
295237define <1 x i64 > @test_vbsl_u64 (<1 x i64 > %a , <1 x i64 > %b , <1 x i64 > %c ) nounwind readnone optsize ssp {
296238; CHECK-LABEL: test_vbsl_u64:
297239; CHECK: @ %bb.0:
298- ; CHECK-NEXT: vldr d16, [sp]
299- ; CHECK-NEXT: vmov d17, r2, r3
300- ; CHECK-NEXT: vmov d18, r0, r1
301- ; CHECK-NEXT: vbit d16, d17, d18
302- ; CHECK-NEXT: vmov r0, r1, d16
303- ; CHECK-NEXT: mov pc, lr
240+ ; CHECK-NEXT: vbsl d0, d1, d2
241+ ; CHECK-NEXT: bx lr
304242 %vbsl3.i = tail call <1 x i64 > @llvm.arm.neon.vbsl.v1i64 (<1 x i64 > %a , <1 x i64 > %b , <1 x i64 > %c ) nounwind
305243 ret <1 x i64 > %vbsl3.i
306244}
307245
308246define <2 x i64 > @test_vbslq_s64 (<2 x i64 > %a , <2 x i64 > %b , <2 x i64 > %c ) nounwind readnone optsize ssp {
309247; CHECK-LABEL: test_vbslq_s64:
310248; CHECK: @ %bb.0:
311- ; CHECK-NEXT: vmov d19, r2, r3
312- ; CHECK-NEXT: add r12, sp, #16
313- ; CHECK-NEXT: vmov d18, r0, r1
314- ; CHECK-NEXT: mov r0, sp
315- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
316- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
317- ; CHECK-NEXT: vbit q8, q10, q9
318- ; CHECK-NEXT: vmov r0, r1, d16
319- ; CHECK-NEXT: vmov r2, r3, d17
320- ; CHECK-NEXT: mov pc, lr
249+ ; CHECK-NEXT: vbsl q0, q1, q2
250+ ; CHECK-NEXT: bx lr
321251 %vbsl3.i = tail call <2 x i64 > @llvm.arm.neon.vbsl.v2i64 (<2 x i64 > %a , <2 x i64 > %b , <2 x i64 > %c ) nounwind
322252 ret <2 x i64 > %vbsl3.i
323253}
324254
325255define <2 x i64 > @test_vbslq_u64 (<2 x i64 > %a , <2 x i64 > %b , <2 x i64 > %c ) nounwind readnone optsize ssp {
326256; CHECK-LABEL: test_vbslq_u64:
327257; CHECK: @ %bb.0:
328- ; CHECK-NEXT: vmov d19, r2, r3
329- ; CHECK-NEXT: add r12, sp, #16
330- ; CHECK-NEXT: vmov d18, r0, r1
331- ; CHECK-NEXT: mov r0, sp
332- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
333- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
334- ; CHECK-NEXT: vbit q8, q10, q9
335- ; CHECK-NEXT: vmov r0, r1, d16
336- ; CHECK-NEXT: vmov r2, r3, d17
337- ; CHECK-NEXT: mov pc, lr
258+ ; CHECK-NEXT: vbsl q0, q1, q2
259+ ; CHECK-NEXT: bx lr
338260 %vbsl3.i = tail call <2 x i64 > @llvm.arm.neon.vbsl.v2i64 (<2 x i64 > %a , <2 x i64 > %b , <2 x i64 > %c ) nounwind
339261 ret <2 x i64 > %vbsl3.i
340262}
0 commit comments