Skip to content

Commit 09806bf

Browse files
committed
[AArch64] recognise zip1/zip2 with flipped operands
1 parent f65294e commit 09806bf

File tree

10 files changed

+203
-203
lines changed

10 files changed

+203
-203
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14722,9 +14722,12 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1472214722
}
1472314723

1472414724
unsigned WhichResult;
14725-
if (isZIPMask(ShuffleMask, NumElts, WhichResult)) {
14725+
unsigned OperandOrder;
14726+
if (isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
1472614727
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
14727-
return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
14728+
return DAG.getNode(Opc, DL, V1.getValueType(),
14729+
(OperandOrder == 0) ? V1 : V2,
14730+
(OperandOrder == 0) ? V2 : V1);
1472814731
}
1472914732
if (isUZPMask(ShuffleMask, NumElts, WhichResult)) {
1473014733
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
@@ -16446,7 +16449,7 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1644616449
isSingletonEXTMask(M, VT, DummyUnsigned) ||
1644716450
isTRNMask(M, NumElts, DummyUnsigned) ||
1644816451
isUZPMask(M, NumElts, DummyUnsigned) ||
16449-
isZIPMask(M, NumElts, DummyUnsigned) ||
16452+
isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
1645016453
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
1645116454
isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
1645216455
isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
@@ -31440,10 +31443,15 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
3144031443
}
3144131444

3144231445
unsigned WhichResult;
31443-
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult) &&
31446+
unsigned OperandOrder;
31447+
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult,
31448+
OperandOrder) &&
3144431449
WhichResult == 0)
3144531450
return convertFromScalableVector(
31446-
DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op2));
31451+
DAG, VT,
31452+
DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT,
31453+
OperandOrder == 0 ? Op1 : Op2,
31454+
OperandOrder == 0 ? Op2 : Op1));
3144731455

3144831456
if (isTRNMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) {
3144931457
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
@@ -31488,10 +31496,14 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
3148831496
return convertFromScalableVector(DAG, VT, Op);
3148931497
}
3149031498

31491-
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult) &&
31499+
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult,
31500+
OperandOrder) &&
3149231501
WhichResult != 0)
3149331502
return convertFromScalableVector(
31494-
DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op2));
31503+
DAG, VT,
31504+
DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT,
31505+
OperandOrder == 0 ? Op1 : Op2,
31506+
OperandOrder == 0 ? Op2 : Op1));
3149531507

3149631508
if (isUZPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) {
3149731509
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;

llvm/lib/Target/AArch64/AArch64PerfectShuffle.h

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6623,34 +6623,49 @@ inline unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
66236623

66246624
/// Return true for zip1 or zip2 masks of the form:
66256625
/// <0, 8, 1, 9, 2, 10, 3, 11> or
6626-
/// <4, 12, 5, 13, 6, 14, 7, 15>
6626+
/// <4, 12, 5, 13, 6, 14, 7, 15> or
6627+
/// <8, 0, 9, 1, 10, 2, 11, 3> or
6628+
/// <12, 4, 13, 5, 14, 6, 15, 7>
66276629
inline bool isZIPMask(ArrayRef<int> M, unsigned NumElts,
6628-
unsigned &WhichResultOut) {
6630+
unsigned &WhichResultOut, unsigned &OperandOrderOut) {
66296631
if (NumElts % 2 != 0)
66306632
return false;
6631-
// Check the first non-undef element for which half to use.
6632-
unsigned WhichResult = 2;
6633-
for (unsigned i = 0; i != NumElts / 2; i++) {
6634-
if (M[i * 2] >= 0) {
6635-
WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1);
6636-
break;
6637-
} else if (M[i * 2 + 1] >= 0) {
6638-
WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1);
6639-
break;
6640-
}
6641-
}
6642-
if (WhichResult == 2)
6643-
return false;
66446633

6634+
// "Variant" refers to the distinction bwetween zip1 and zip2, while
6635+
// "Order" refers to sequence of input registers (matching vs flipped).
6636+
bool Variant0Order0 = true;
6637+
bool Variant1Order0 = true;
6638+
bool Variant0Order1 = true;
6639+
bool Variant1Order1 = true;
66456640
// Check all elements match.
6646-
unsigned Idx = WhichResult * NumElts / 2;
66476641
for (unsigned i = 0; i != NumElts; i += 2) {
6648-
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
6649-
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
6650-
return false;
6651-
Idx += 1;
6642+
if (M[i] >= 0) {
6643+
if ((unsigned)M[i] != i / 2)
6644+
Variant0Order0 = false;
6645+
if ((unsigned)M[i] != NumElts / 2 + i / 2)
6646+
Variant1Order0 = false;
6647+
if ((unsigned)M[i] != NumElts + i / 2)
6648+
Variant0Order1 = false;
6649+
if ((unsigned)M[i] != NumElts + NumElts / 2 + i / 2)
6650+
Variant1Order1 = false;
6651+
}
6652+
if (M[i + 1] >= 0) {
6653+
if ((unsigned)M[i + 1] != NumElts + i / 2)
6654+
Variant0Order0 = false;
6655+
if ((unsigned)M[i + 1] != NumElts + NumElts / 2 + i / 2)
6656+
Variant1Order0 = false;
6657+
if ((unsigned)M[i + 1] != i / 2)
6658+
Variant0Order1 = false;
6659+
if ((unsigned)M[i + 1] != NumElts / 2 + i / 2)
6660+
Variant1Order1 = false;
6661+
}
66526662
}
6653-
WhichResultOut = WhichResult;
6663+
6664+
if (Variant0Order0 + Variant1Order0 + Variant0Order1 + Variant1Order1 != 1)
6665+
return false;
6666+
6667+
WhichResultOut = (Variant0Order0 || Variant0Order1) ? 0 : 1;
6668+
OperandOrderOut = (Variant0Order0 || Variant1Order0) ? 0 : 1;
66546669
return true;
66556670
}
66566671

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6062,7 +6062,7 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
60626062
if (LT.second.isFixedLengthVector() &&
60636063
LT.second.getVectorNumElements() == Mask.size() &&
60646064
(Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) &&
6065-
(isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6065+
(isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
60666066
isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
60676067
isREVMask(Mask, LT.second.getScalarSizeInBits(),
60686068
LT.second.getVectorNumElements(), 16) ||

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,10 +252,11 @@ bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
252252
ShuffleVectorPseudo &MatchInfo) {
253253
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
254254
unsigned WhichResult;
255+
unsigned OperandOrder;
255256
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
256257
Register Dst = MI.getOperand(0).getReg();
257258
unsigned NumElts = MRI.getType(Dst).getNumElements();
258-
if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
259+
if (!isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
259260
return false;
260261
unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
261262
Register V1 = MI.getOperand(1).getReg();

llvm/test/CodeGen/AArch64/arm64-zip.ll

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -355,48 +355,38 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
355355
ret <8 x i16> %3
356356
}
357357

358-
; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
359358
define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
360359
; CHECK-SD-LABEL: combine_v8i16_8first:
361360
; CHECK-SD: // %bb.0:
362-
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
363-
; CHECK-SD-NEXT: adrp x8, .LCPI25_0
364-
; CHECK-SD-NEXT: fmov d2, d0
365-
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0]
366-
; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
361+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
362+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
363+
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
367364
; CHECK-SD-NEXT: ret
368365
;
369366
; CHECK-GI-LABEL: combine_v8i16_8first:
370367
; CHECK-GI: // %bb.0:
371-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
372-
; CHECK-GI-NEXT: adrp x8, .LCPI25_0
373-
; CHECK-GI-NEXT: fmov d31, d1
374-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0]
375-
; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
368+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
369+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
370+
; CHECK-GI-NEXT: zip1.16b v0, v1, v0
376371
; CHECK-GI-NEXT: ret
377372
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
378373
ret <16 x i8> %3
379374
}
380375

381376

382-
; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
383377
define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
384378
; CHECK-SD-LABEL: combine_v8i16_8firstundef:
385379
; CHECK-SD: // %bb.0:
386-
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
387-
; CHECK-SD-NEXT: adrp x8, .LCPI26_0
388-
; CHECK-SD-NEXT: fmov d2, d0
389-
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0]
390-
; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
380+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
381+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
382+
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
391383
; CHECK-SD-NEXT: ret
392384
;
393385
; CHECK-GI-LABEL: combine_v8i16_8firstundef:
394386
; CHECK-GI: // %bb.0:
395-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
396-
; CHECK-GI-NEXT: adrp x8, .LCPI26_0
397-
; CHECK-GI-NEXT: fmov d31, d1
398-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
399-
; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
387+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
388+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
389+
; CHECK-GI-NEXT: zip1.16b v0, v1, v0
400390
; CHECK-GI-NEXT: ret
401391
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
402392
ret <16 x i8> %3

llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec
88
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
99
; CHECK-SD-NEXT: dup v2.2s, v0.s[1]
1010
; CHECK-SD-NEXT: mov v1.16b, v2.16b
11+
; CHECK-SD-NEXT: zip1 v2.4h, v0.4h, v2.4h
1112
; CHECK-SD-NEXT: mov v1.h[0], v0.h[1]
12-
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
13-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
13+
; CHECK-SD-NEXT: fmov d0, d2
1414
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
1515
; CHECK-SD-NEXT: ret
1616
;

llvm/test/CodeGen/AArch64/insert-extend.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -66,57 +66,57 @@ define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr noca
6666
; CHECK-NEXT: ldr d5, [x11, x9]
6767
; CHECK-NEXT: shll2 v6.4s, v0.8h, #16
6868
; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b
69+
; CHECK-NEXT: shll2 v7.4s, v1.8h, #16
6970
; CHECK-NEXT: usubl v3.8h, v4.8b, v5.8b
70-
; CHECK-NEXT: shll2 v4.4s, v1.8h, #16
7171
; CHECK-NEXT: saddw v0.4s, v6.4s, v0.4h
72-
; CHECK-NEXT: shll2 v6.4s, v2.8h, #16
72+
; CHECK-NEXT: shll2 v4.4s, v2.8h, #16
73+
; CHECK-NEXT: saddw v1.4s, v7.4s, v1.4h
7374
; CHECK-NEXT: shll2 v5.4s, v3.8h, #16
74-
; CHECK-NEXT: saddw v1.4s, v4.4s, v1.4h
75-
; CHECK-NEXT: rev64 v4.4s, v0.4s
76-
; CHECK-NEXT: saddw v2.4s, v6.4s, v2.4h
75+
; CHECK-NEXT: rev64 v6.4s, v0.4s
76+
; CHECK-NEXT: saddw v2.4s, v4.4s, v2.4h
77+
; CHECK-NEXT: rev64 v7.4s, v1.4s
7778
; CHECK-NEXT: saddw v3.4s, v5.4s, v3.4h
78-
; CHECK-NEXT: rev64 v5.4s, v1.4s
79-
; CHECK-NEXT: rev64 v6.4s, v2.4s
80-
; CHECK-NEXT: sub v4.4s, v0.4s, v4.4s
79+
; CHECK-NEXT: rev64 v4.4s, v2.4s
80+
; CHECK-NEXT: sub v6.4s, v0.4s, v6.4s
8181
; CHECK-NEXT: addp v0.4s, v1.4s, v0.4s
82-
; CHECK-NEXT: rev64 v7.4s, v3.4s
83-
; CHECK-NEXT: sub v5.4s, v1.4s, v5.4s
84-
; CHECK-NEXT: sub v6.4s, v2.4s, v6.4s
82+
; CHECK-NEXT: rev64 v5.4s, v3.4s
83+
; CHECK-NEXT: sub v7.4s, v1.4s, v7.4s
84+
; CHECK-NEXT: sub v4.4s, v2.4s, v4.4s
8585
; CHECK-NEXT: addp v2.4s, v3.4s, v2.4s
86-
; CHECK-NEXT: zip1 v16.4s, v5.4s, v4.4s
87-
; CHECK-NEXT: sub v7.4s, v3.4s, v7.4s
88-
; CHECK-NEXT: trn1 v4.4s, v5.4s, v4.4s
89-
; CHECK-NEXT: zip2 v3.4s, v6.4s, v7.4s
90-
; CHECK-NEXT: mov v6.s[1], v7.s[0]
86+
; CHECK-NEXT: zip1 v16.4s, v7.4s, v6.4s
87+
; CHECK-NEXT: sub v5.4s, v3.4s, v5.4s
88+
; CHECK-NEXT: trn1 v3.4s, v7.4s, v6.4s
89+
; CHECK-NEXT: zip1 v6.4s, v4.4s, v5.4s
90+
; CHECK-NEXT: zip2 v4.4s, v4.4s, v5.4s
91+
; CHECK-NEXT: ext v5.16b, v7.16b, v16.16b, #8
9192
; CHECK-NEXT: ext v7.16b, v2.16b, v2.16b, #8
92-
; CHECK-NEXT: ext v5.16b, v5.16b, v16.16b, #8
93-
; CHECK-NEXT: mov v3.d[1], v4.d[1]
94-
; CHECK-NEXT: uzp1 v1.4s, v7.4s, v0.4s
95-
; CHECK-NEXT: uzp2 v4.4s, v7.4s, v0.4s
93+
; CHECK-NEXT: mov v4.d[1], v3.d[1]
9694
; CHECK-NEXT: mov v6.d[1], v5.d[1]
95+
; CHECK-NEXT: uzp1 v1.4s, v7.4s, v0.4s
96+
; CHECK-NEXT: uzp2 v3.4s, v7.4s, v0.4s
9797
; CHECK-NEXT: addp v0.4s, v2.4s, v0.4s
98-
; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s
98+
; CHECK-NEXT: add v5.4s, v4.4s, v6.4s
99+
; CHECK-NEXT: sub v4.4s, v6.4s, v4.4s
100+
; CHECK-NEXT: sub v1.4s, v1.4s, v3.4s
99101
; CHECK-NEXT: rev64 v7.4s, v0.4s
100-
; CHECK-NEXT: add v5.4s, v3.4s, v6.4s
101-
; CHECK-NEXT: sub v3.4s, v6.4s, v3.4s
102+
; CHECK-NEXT: rev64 v3.4s, v5.4s
103+
; CHECK-NEXT: rev64 v6.4s, v4.4s
102104
; CHECK-NEXT: rev64 v2.4s, v1.4s
103-
; CHECK-NEXT: rev64 v4.4s, v5.4s
104-
; CHECK-NEXT: rev64 v6.4s, v3.4s
105105
; CHECK-NEXT: addp v16.4s, v0.4s, v5.4s
106106
; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s
107-
; CHECK-NEXT: zip1 v21.4s, v16.4s, v16.4s
108-
; CHECK-NEXT: sub v4.4s, v5.4s, v4.4s
109-
; CHECK-NEXT: addp v5.4s, v1.4s, v3.4s
110-
; CHECK-NEXT: sub v3.4s, v3.4s, v6.4s
107+
; CHECK-NEXT: sub v3.4s, v5.4s, v3.4s
108+
; CHECK-NEXT: addp v5.4s, v1.4s, v4.4s
109+
; CHECK-NEXT: sub v4.4s, v4.4s, v6.4s
111110
; CHECK-NEXT: sub v1.4s, v1.4s, v2.4s
112111
; CHECK-NEXT: ext v7.16b, v0.16b, v16.16b, #4
113-
; CHECK-NEXT: ext v2.16b, v16.16b, v4.16b, #4
114-
; CHECK-NEXT: ext v6.16b, v5.16b, v3.16b, #4
115-
; CHECK-NEXT: mov v19.16b, v4.16b
112+
; CHECK-NEXT: zip1 v21.4s, v16.4s, v16.4s
113+
; CHECK-NEXT: ext v2.16b, v16.16b, v3.16b, #4
114+
; CHECK-NEXT: ext v6.16b, v5.16b, v4.16b, #4
115+
; CHECK-NEXT: mov v19.16b, v3.16b
116116
; CHECK-NEXT: ext v17.16b, v1.16b, v5.16b, #8
117-
; CHECK-NEXT: mov v20.16b, v3.16b
118-
; CHECK-NEXT: trn2 v0.4s, v21.4s, v0.4s
117+
; CHECK-NEXT: mov v20.16b, v4.16b
119118
; CHECK-NEXT: ext v7.16b, v7.16b, v7.16b, #4
119+
; CHECK-NEXT: trn2 v0.4s, v21.4s, v0.4s
120120
; CHECK-NEXT: mov v19.s[2], v16.s[3]
121121
; CHECK-NEXT: zip2 v2.4s, v2.4s, v16.4s
122122
; CHECK-NEXT: zip2 v6.4s, v6.4s, v5.4s
@@ -125,8 +125,8 @@ define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr noca
125125
; CHECK-NEXT: mov v1.s[2], v5.s[1]
126126
; CHECK-NEXT: mov v21.16b, v7.16b
127127
; CHECK-NEXT: sub v7.4s, v0.4s, v7.4s
128-
; CHECK-NEXT: ext v2.16b, v4.16b, v2.16b, #12
129-
; CHECK-NEXT: ext v3.16b, v3.16b, v6.16b, #12
128+
; CHECK-NEXT: ext v2.16b, v3.16b, v2.16b, #12
129+
; CHECK-NEXT: ext v3.16b, v4.16b, v6.16b, #12
130130
; CHECK-NEXT: uzp2 v4.4s, v17.4s, v18.4s
131131
; CHECK-NEXT: mov v6.16b, v1.16b
132132
; CHECK-NEXT: mov v17.16b, v19.16b

0 commit comments

Comments
 (0)