Skip to content

Commit 06d2d1e

Browse files
authored
[ARM] Protect against odd sized vectors in isVTRNMask and friends (llvm#153413)
Fixes the issue reported on llvm#153138, where odd-sized vectors would cause the checks to iterate off the end of the mask.
1 parent bfd490e commit 06d2d1e

File tree

4 files changed

+204
-59
lines changed

4 files changed

+204
-59
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7406,7 +7406,7 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
74067406
return false;
74077407

74087408
unsigned NumElts = VT.getVectorNumElements();
7409-
if (M.size() != NumElts && M.size() != NumElts*2)
7409+
if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
74107410
return false;
74117411

74127412
// If the mask is twice as long as the input vector then we need to check the
@@ -7438,7 +7438,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
74387438
return false;
74397439

74407440
unsigned NumElts = VT.getVectorNumElements();
7441-
if (M.size() != NumElts && M.size() != NumElts*2)
7441+
if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
74427442
return false;
74437443

74447444
for (unsigned i = 0; i < M.size(); i += NumElts) {
@@ -7541,7 +7541,7 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
75417541
return false;
75427542

75437543
unsigned NumElts = VT.getVectorNumElements();
7544-
if (M.size() != NumElts && M.size() != NumElts*2)
7544+
if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
75457545
return false;
75467546

75477547
for (unsigned i = 0; i < M.size(); i += NumElts) {
@@ -7574,7 +7574,7 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
75747574
return false;
75757575

75767576
unsigned NumElts = VT.getVectorNumElements();
7577-
if (M.size() != NumElts && M.size() != NumElts*2)
7577+
if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
75787578
return false;
75797579

75807580
for (unsigned i = 0; i < M.size(); i += NumElts) {

llvm/test/CodeGen/ARM/vtrn.ll

Lines changed: 125 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
23

34
define <8 x i8> @vtrni8(ptr %A, ptr %B) nounwind {
@@ -20,11 +21,11 @@ define <8 x i8> @vtrni8(ptr %A, ptr %B) nounwind {
2021
define <16 x i8> @vtrni8_Qres(ptr %A, ptr %B) nounwind {
2122
; CHECK-LABEL: vtrni8_Qres:
2223
; CHECK: @ %bb.0:
23-
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
24-
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
25-
; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]]
26-
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
27-
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
24+
; CHECK-NEXT: vldr d16, [r1]
25+
; CHECK-NEXT: vldr d17, [r0]
26+
; CHECK-NEXT: vtrn.8 d17, d16
27+
; CHECK-NEXT: vmov r0, r1, d17
28+
; CHECK-NEXT: vmov r2, r3, d16
2829
; CHECK-NEXT: mov pc, lr
2930
%tmp1 = load <8 x i8>, ptr %A
3031
%tmp2 = load <8 x i8>, ptr %B
@@ -52,11 +53,11 @@ define <4 x i16> @vtrni16(ptr %A, ptr %B) nounwind {
5253
define <8 x i16> @vtrni16_Qres(ptr %A, ptr %B) nounwind {
5354
; CHECK-LABEL: vtrni16_Qres:
5455
; CHECK: @ %bb.0:
55-
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
56-
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
57-
; CHECK-NEXT: vtrn.16 [[LDR0]], [[LDR1]]
58-
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
59-
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
56+
; CHECK-NEXT: vldr d16, [r1]
57+
; CHECK-NEXT: vldr d17, [r0]
58+
; CHECK-NEXT: vtrn.16 d17, d16
59+
; CHECK-NEXT: vmov r0, r1, d17
60+
; CHECK-NEXT: vmov r2, r3, d16
6061
; CHECK-NEXT: mov pc, lr
6162
%tmp1 = load <4 x i16>, ptr %A
6263
%tmp2 = load <4 x i16>, ptr %B
@@ -84,11 +85,11 @@ define <2 x i32> @vtrni32(ptr %A, ptr %B) nounwind {
8485
define <4 x i32> @vtrni32_Qres(ptr %A, ptr %B) nounwind {
8586
; CHECK-LABEL: vtrni32_Qres:
8687
; CHECK: @ %bb.0:
87-
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
88-
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
89-
; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]]
90-
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
91-
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
88+
; CHECK-NEXT: vldr d16, [r1]
89+
; CHECK-NEXT: vldr d17, [r0]
90+
; CHECK-NEXT: vtrn.32 d17, d16
91+
; CHECK-NEXT: vmov r0, r1, d17
92+
; CHECK-NEXT: vmov r2, r3, d16
9293
; CHECK-NEXT: mov pc, lr
9394
%tmp1 = load <2 x i32>, ptr %A
9495
%tmp2 = load <2 x i32>, ptr %B
@@ -116,11 +117,11 @@ define <2 x float> @vtrnf(ptr %A, ptr %B) nounwind {
116117
define <4 x float> @vtrnf_Qres(ptr %A, ptr %B) nounwind {
117118
; CHECK-LABEL: vtrnf_Qres:
118119
; CHECK: @ %bb.0:
119-
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
120-
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
121-
; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]]
122-
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
123-
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
120+
; CHECK-NEXT: vldr d16, [r1]
121+
; CHECK-NEXT: vldr d17, [r0]
122+
; CHECK-NEXT: vtrn.32 d17, d16
123+
; CHECK-NEXT: vmov r0, r1, d17
124+
; CHECK-NEXT: vmov r2, r3, d16
124125
; CHECK-NEXT: mov pc, lr
125126
%tmp1 = load <2 x float>, ptr %A
126127
%tmp2 = load <2 x float>, ptr %B
@@ -281,11 +282,11 @@ define <8 x i8> @vtrni8_undef(ptr %A, ptr %B) nounwind {
281282
define <16 x i8> @vtrni8_undef_Qres(ptr %A, ptr %B) nounwind {
282283
; CHECK-LABEL: vtrni8_undef_Qres:
283284
; CHECK: @ %bb.0:
284-
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
285-
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
286-
; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]]
287-
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
288-
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
285+
; CHECK-NEXT: vldr d16, [r1]
286+
; CHECK-NEXT: vldr d17, [r0]
287+
; CHECK-NEXT: vtrn.8 d17, d16
288+
; CHECK-NEXT: vmov r0, r1, d17
289+
; CHECK-NEXT: vmov r2, r3, d16
289290
; CHECK-NEXT: mov pc, lr
290291
%tmp1 = load <8 x i8>, ptr %A
291292
%tmp2 = load <8 x i8>, ptr %B
@@ -327,9 +328,15 @@ define <16 x i16> @vtrnQi16_undef_QQres(ptr %A, ptr %B) nounwind {
327328
}
328329

329330
define <8 x i16> @vtrn_lower_shufflemask_undef(ptr %A, ptr %B) {
331+
; CHECK-LABEL: vtrn_lower_shufflemask_undef:
332+
; CHECK: @ %bb.0: @ %entry
333+
; CHECK-NEXT: vldr d16, [r1]
334+
; CHECK-NEXT: vldr d17, [r0]
335+
; CHECK-NEXT: vtrn.16 d17, d16
336+
; CHECK-NEXT: vmov r0, r1, d16
337+
; CHECK-NEXT: vmov r2, r3, d16
338+
; CHECK-NEXT: mov pc, lr
330339
entry:
331-
; CHECK-LABEL: vtrn_lower_shufflemask_undef
332-
; CHECK: vtrn
333340
%tmp1 = load <4 x i16>, ptr %A
334341
%tmp2 = load <4 x i16>, ptr %B
335342
%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7>
@@ -340,12 +347,26 @@ entry:
340347
; values do modify the type. However, we get different input types, as some of
341348
; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of
342349
; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).
343-
define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
344-
<4 x i32> %cmp0, <4 x i32> %cmp1,
345-
<4 x i16> %cmp2, <4 x i16> %cmp3) {
346-
; CHECK-LABEL: vtrn_mismatched_builvector0:
347-
; CHECK: vmovn.i32
348-
; CHECK: vbsl
350+
define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1, <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i16> %cmp2, <4 x i16> %cmp3) {
351+
; CHECK-LABEL: vtrn_mismatched_builvector0:
352+
; CHECK: @ %bb.0:
353+
; CHECK-NEXT: mov r12, sp
354+
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
355+
; CHECK-NEXT: add r12, sp, #16
356+
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
357+
; CHECK-NEXT: vcgt.u32 q8, q9, q8
358+
; CHECK-NEXT: vldr d20, [sp, #32]
359+
; CHECK-NEXT: vldr d18, [sp, #40]
360+
; CHECK-NEXT: vcgt.u16 d18, d18, d20
361+
; CHECK-NEXT: vmovn.i32 d16, q8
362+
; CHECK-NEXT: vmov d17, r2, r3
363+
; CHECK-NEXT: vtrn.8 d16, d18
364+
; CHECK-NEXT: vmov d18, r0, r1
365+
; CHECK-NEXT: vshl.i8 d16, d16, #7
366+
; CHECK-NEXT: vshr.s8 d16, d16, #7
367+
; CHECK-NEXT: vbsl d16, d18, d17
368+
; CHECK-NEXT: vmov r0, r1, d16
369+
; CHECK-NEXT: mov pc, lr
349370
%c0 = icmp ult <4 x i32> %cmp0, %cmp1
350371
%c1 = icmp ult <4 x i16> %cmp2, %cmp3
351372
%c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -356,12 +377,30 @@ define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
356377
; Here we get a build_vector node, where half the incoming extract_element
357378
; values do not modify the type (the values form cmp2), but half of them do
358379
; (from the icmp operation).
359-
define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
360-
<4 x i32> %cmp0, <4 x i32> %cmp1, ptr %cmp2_ptr) {
361-
; CHECK-LABEL: vtrn_mismatched_builvector1:
362-
; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
363-
; CHECK: vmovl
364-
; CHECK: vbsl
380+
; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
381+
define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1, <4 x i32> %cmp0, <4 x i32> %cmp1, ptr %cmp2_ptr) {
382+
; CHECK-LABEL: vtrn_mismatched_builvector1:
383+
; CHECK: @ %bb.0:
384+
; CHECK-NEXT: .save {r11, lr}
385+
; CHECK-NEXT: push {r11, lr}
386+
; CHECK-NEXT: add r12, sp, #8
387+
; CHECK-NEXT: add lr, sp, #24
388+
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
389+
; CHECK-NEXT: ldr r12, [sp, #40]
390+
; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
391+
; CHECK-NEXT: vcgt.u32 q8, q9, q8
392+
; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
393+
; CHECK-NEXT: vmovl.u8 q9, d18
394+
; CHECK-NEXT: vmovn.i32 d16, q8
395+
; CHECK-NEXT: vmov d17, r2, r3
396+
; CHECK-NEXT: vtrn.8 d16, d18
397+
; CHECK-NEXT: vmov d18, r0, r1
398+
; CHECK-NEXT: vshl.i8 d16, d16, #7
399+
; CHECK-NEXT: vshr.s8 d16, d16, #7
400+
; CHECK-NEXT: vbsl d16, d18, d17
401+
; CHECK-NEXT: vmov r0, r1, d16
402+
; CHECK-NEXT: pop {r11, lr}
403+
; CHECK-NEXT: mov pc, lr
365404
%cmp2_load = load <4 x i8>, ptr %cmp2_ptr, align 4
366405
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
367406
%c0 = icmp ult <4 x i32> %cmp0, %cmp1
@@ -373,15 +412,15 @@ define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
373412
; The shuffle mask is half a vtrn; we duplicate the half to produce the
374413
; full result.
375414
define void @lower_twice_no_vtrn(ptr %A, ptr %B, ptr %C) {
415+
; CHECK-LABEL: lower_twice_no_vtrn:
416+
; CHECK: @ %bb.0: @ %entry
417+
; CHECK-NEXT: vldr d16, [r1]
418+
; CHECK-NEXT: vldr d18, [r0]
419+
; CHECK-NEXT: vtrn.16 d18, d16
420+
; CHECK-NEXT: vorr d17, d16, d16
421+
; CHECK-NEXT: vst1.64 {d16, d17}, [r2]
422+
; CHECK-NEXT: mov pc, lr
376423
entry:
377-
; CHECK-LABEL: lower_twice_no_vtrn:
378-
; CHECK: @ %bb.0:
379-
; CHECK-NEXT: vldr d16, [r1]
380-
; CHECK-NEXT: vldr d18, [r0]
381-
; CHECK-NEXT: vtrn.16 d18, d16
382-
; CHECK-NEXT: vorr d17, d16, d16
383-
; CHECK-NEXT: vst1.64 {d16, d17}, [r2]
384-
; CHECK-NEXT: mov pc, lr
385424
%tmp1 = load <4 x i16>, ptr %A
386425
%tmp2 = load <4 x i16>, ptr %B
387426
%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7>
@@ -392,18 +431,49 @@ entry:
392431
; The shuffle mask is half a vtrn; we duplicate the half to produce the
393432
; full result.
394433
define void @upper_twice_no_vtrn(ptr %A, ptr %B, ptr %C) {
434+
; CHECK-LABEL: upper_twice_no_vtrn:
435+
; CHECK: @ %bb.0: @ %entry
436+
; CHECK-NEXT: vldr d16, [r1]
437+
; CHECK-NEXT: vldr d18, [r0]
438+
; CHECK-NEXT: vtrn.16 d18, d16
439+
; CHECK-NEXT: vorr d19, d18, d18
440+
; CHECK-NEXT: vst1.64 {d18, d19}, [r2]
441+
; CHECK-NEXT: mov pc, lr
395442
entry:
396-
; CHECK-LABEL: upper_twice_no_vtrn:
397-
; CHECK: @ %bb.0:
398-
; CHECK-NEXT: vldr d16, [r1]
399-
; CHECK-NEXT: vldr d18, [r0]
400-
; CHECK-NEXT: vtrn.16 d18, d16
401-
; CHECK-NEXT: vorr d19, d18, d18
402-
; CHECK-NEXT: vst1.64 {d18, d19}, [r2]
403-
; CHECK-NEXT: mov pc, lr
404443
%tmp1 = load <4 x i16>, ptr %A
405444
%tmp2 = load <4 x i16>, ptr %B
406445
%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6>
407446
store <8 x i16> %0, ptr %C
408447
ret void
409448
}
449+
450+
define void @test_15xi16(ptr %next.gep, ptr %next.gep13) {
451+
; CHECK-LABEL: test_15xi16:
452+
; CHECK: @ %bb.0:
453+
; CHECK-NEXT: add r2, r0, #2
454+
; CHECK-NEXT: add r3, r0, #6
455+
; CHECK-NEXT: vld1.16 {d16, d17}, [r2]!
456+
; CHECK-NEXT: vld1.16 {d18}, [r2]!
457+
; CHECK-NEXT: vld1.16 {d20, d21}, [r3]!
458+
; CHECK-NEXT: ldr r2, [r2]
459+
; CHECK-NEXT: vld1.16 {d22}, [r3]!
460+
; CHECK-NEXT: vmov.16 d19[0], r2
461+
; CHECK-NEXT: ldr r3, [r3]
462+
; CHECK-NEXT: add r2, r0, #30
463+
; CHECK-NEXT: add r0, r0, #34
464+
; CHECK-NEXT: vmov.16 d19[1], r3
465+
; CHECK-NEXT: vld1.16 {d19[2]}, [r2:16]
466+
; CHECK-NEXT: vtrn.16 q8, q10
467+
; CHECK-NEXT: vld1.16 {d19[3]}, [r0:16]
468+
; CHECK-NEXT: vtrn.16 d18, d22
469+
; CHECK-NEXT: vst1.16 {d16, d17}, [r1]!
470+
; CHECK-NEXT: vst1.16 {d18, d19}, [r1]
471+
; CHECK-NEXT: mov pc, lr
472+
%a = getelementptr inbounds nuw i8, ptr %next.gep, i32 2
473+
%b = load <15 x i16>, ptr %a, align 2
474+
%c = getelementptr inbounds nuw i8, ptr %next.gep, i32 6
475+
%d = load <15 x i16>, ptr %c, align 2
476+
%interleaved.vec = shufflevector <15 x i16> %b, <15 x i16> %d, <16 x i32> <i32 0, i32 15, i32 2, i32 17, i32 4, i32 19, i32 6, i32 21, i32 8, i32 23, i32 10, i32 25, i32 12, i32 27, i32 14, i32 29>
477+
store <16 x i16> %interleaved.vec, ptr %next.gep13, align 2
478+
ret void
479+
}

llvm/test/CodeGen/ARM/vuzp.ll

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,3 +535,59 @@ define %struct.uint8x8x2_t @vuzp_extract_subvector(<16 x i8> %t) #0 {
535535
%.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
536536
ret %struct.uint8x8x2_t %.fca.0.1.insert
537537
}
538+
539+
define void @test_15xi16(ptr %next.gep, ptr %next.gep13) {
540+
; CHECK-LABEL: test_15xi16:
541+
; CHECK: @ %bb.0:
542+
; CHECK-NEXT: .save {r4, r5, r6, lr}
543+
; CHECK-NEXT: push {r4, r5, r6, lr}
544+
; CHECK-NEXT: add r2, r0, #2
545+
; CHECK-NEXT: add r3, r0, #6
546+
; CHECK-NEXT: vld1.16 {d20, d21}, [r2]!
547+
; CHECK-NEXT: vld1.16 {d16}, [r2]!
548+
; CHECK-NEXT: vmov.u16 r12, d16[0]
549+
; CHECK-NEXT: ldr r2, [r2]
550+
; CHECK-NEXT: vmov.u16 r4, d20[0]
551+
; CHECK-NEXT: vld1.16 {d22, d23}, [r3]!
552+
; CHECK-NEXT: vld1.16 {d24}, [r3]!
553+
; CHECK-NEXT: vmov.u16 lr, d16[2]
554+
; CHECK-NEXT: vmov.u16 r5, d22[0]
555+
; CHECK-NEXT: vmov.u16 r6, d21[0]
556+
; CHECK-NEXT: vmov.16 d17[0], r12
557+
; CHECK-NEXT: vmov.16 d16[0], r4
558+
; CHECK-NEXT: vmov.u16 r4, d24[0]
559+
; CHECK-NEXT: vmov.u16 r12, d24[2]
560+
; CHECK-NEXT: vmov.16 d17[1], lr
561+
; CHECK-NEXT: vmov.16 d18[0], r5
562+
; CHECK-NEXT: vmov.u16 r5, d20[2]
563+
; CHECK-NEXT: vmov.u16 lr, d23[0]
564+
; CHECK-NEXT: vmov.16 d19[0], r4
565+
; CHECK-NEXT: vmov.u16 r4, d22[2]
566+
; CHECK-NEXT: vmov.16 d16[1], r5
567+
; CHECK-NEXT: vmov.u16 r5, d21[2]
568+
; CHECK-NEXT: vmov.16 d17[2], r2
569+
; CHECK-NEXT: ldr r2, [r3]
570+
; CHECK-NEXT: vmov.16 d16[2], r6
571+
; CHECK-NEXT: vmov.16 d18[1], r4
572+
; CHECK-NEXT: vmov.u16 r4, d23[2]
573+
; CHECK-NEXT: vmov.16 d19[1], r12
574+
; CHECK-NEXT: vmov.16 d18[2], lr
575+
; CHECK-NEXT: vmov.16 d19[2], r2
576+
; CHECK-NEXT: add r2, r0, #30
577+
; CHECK-NEXT: add r0, r0, #34
578+
; CHECK-NEXT: vld1.16 {d17[3]}, [r2:16]
579+
; CHECK-NEXT: vmov.16 d16[3], r5
580+
; CHECK-NEXT: vmov.16 d18[3], r4
581+
; CHECK-NEXT: vld1.16 {d19[3]}, [r0:16]
582+
; CHECK-NEXT: vst1.16 {d16, d17}, [r1]!
583+
; CHECK-NEXT: vst1.16 {d18, d19}, [r1]
584+
; CHECK-NEXT: pop {r4, r5, r6, lr}
585+
; CHECK-NEXT: mov pc, lr
586+
%a = getelementptr inbounds nuw i8, ptr %next.gep, i32 2
587+
%b = load <15 x i16>, ptr %a, align 2
588+
%c = getelementptr inbounds nuw i8, ptr %next.gep, i32 6
589+
%d = load <15 x i16>, ptr %c, align 2
590+
%interleaved.vec = shufflevector <15 x i16> %b, <15 x i16> %d, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29>
591+
store <16 x i16> %interleaved.vec, ptr %next.gep13, align 2
592+
ret void
593+
}

llvm/test/CodeGen/ARM/vzip.ll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,22 @@ entry:
381381
%vzip.i = shufflevector <8 x i8> %lane, <8 x i8> %lane3, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
382382
ret <8 x i8> %vzip.i
383383
}
384+
385+
define <16 x i16> @test_15xi16(ptr %next.gep, ptr %next.gep13) {
386+
; CHECK-LABEL: test_15xi16:
387+
; CHECK: @ %bb.0:
388+
; CHECK-NEXT: add r1, r1, #2
389+
; CHECK-NEXT: mov r2, #4
390+
; CHECK-NEXT: vld1.16 {d16, d17}, [r1], r2
391+
; CHECK-NEXT: vld1.16 {d18, d19}, [r1]
392+
; CHECK-NEXT: vzip.16 q8, q9
393+
; CHECK-NEXT: vst1.16 {d16, d17}, [r0:128]!
394+
; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]
395+
; CHECK-NEXT: mov pc, lr
396+
%a = getelementptr inbounds nuw i8, ptr %next.gep, i32 2
397+
%b = load <15 x i16>, ptr %a, align 2
398+
%c = getelementptr inbounds nuw i8, ptr %next.gep, i32 6
399+
%d = load <15 x i16>, ptr %c, align 2
400+
%interleaved.vec = shufflevector <15 x i16> %b, <15 x i16> %d, <16 x i32> <i32 0, i32 15, i32 1, i32 16, i32 2, i32 17, i32 3, i32 18, i32 4, i32 19, i32 5, i32 20, i32 6, i32 21, i32 7, i32 22>
401+
ret <16 x i16> %interleaved.vec
402+
}

0 commit comments

Comments
 (0)