Skip to content

Commit b2ebf12

Browse files
committed
[AArch64] recognise zip1/zip2 with flipped operands
1 parent cc3a505 commit b2ebf12

File tree

10 files changed

+141
-141
lines changed

10 files changed

+141
-141
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14582,9 +14582,12 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1458214582
}
1458314583

1458414584
unsigned WhichResult;
14585-
if (isZIPMask(ShuffleMask, NumElts, WhichResult)) {
14585+
unsigned OperandOrder;
14586+
if (isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
1458614587
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
14587-
return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
14588+
return DAG.getNode(Opc, DL, V1.getValueType(),
14589+
(OperandOrder == 0) ? V1 : V2,
14590+
(OperandOrder == 0) ? V2 : V1);
1458814591
}
1458914592
if (isUZPMask(ShuffleMask, NumElts, WhichResult)) {
1459014593
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
@@ -16306,7 +16309,7 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1630616309
isSingletonEXTMask(M, VT, DummyUnsigned) ||
1630716310
isTRNMask(M, NumElts, DummyUnsigned) ||
1630816311
isUZPMask(M, NumElts, DummyUnsigned) ||
16309-
isZIPMask(M, NumElts, DummyUnsigned) ||
16312+
isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
1631016313
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
1631116314
isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
1631216315
isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
@@ -31278,10 +31281,15 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
3127831281
}
3127931282

3128031283
unsigned WhichResult;
31281-
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult) &&
31284+
unsigned OperandOrder;
31285+
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult,
31286+
OperandOrder) &&
3128231287
WhichResult == 0)
3128331288
return convertFromScalableVector(
31284-
DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op2));
31289+
DAG, VT,
31290+
DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT,
31291+
OperandOrder == 0 ? Op1 : Op2,
31292+
OperandOrder == 0 ? Op2 : Op1));
3128531293

3128631294
if (isTRNMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) {
3128731295
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
@@ -31326,10 +31334,14 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
3132631334
return convertFromScalableVector(DAG, VT, Op);
3132731335
}
3132831336

31329-
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult) &&
31337+
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult,
31338+
OperandOrder) &&
3133031339
WhichResult != 0)
3133131340
return convertFromScalableVector(
31332-
DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op2));
31341+
DAG, VT,
31342+
DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT,
31343+
OperandOrder == 0 ? Op1 : Op2,
31344+
OperandOrder == 0 ? Op2 : Op1));
3133331345

3133431346
if (isUZPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) {
3133531347
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;

llvm/lib/Target/AArch64/AArch64PerfectShuffle.h

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6623,34 +6623,49 @@ inline unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
66236623

66246624
/// Return true for zip1 or zip2 masks of the form:
66256625
/// <0, 8, 1, 9, 2, 10, 3, 11> or
6626-
/// <4, 12, 5, 13, 6, 14, 7, 15>
6626+
/// <4, 12, 5, 13, 6, 14, 7, 15> or
6627+
/// <8, 0, 9, 1, 10, 2, 11, 3> or
6628+
/// <12, 4, 13, 5, 14, 6, 15, 7>
66276629
inline bool isZIPMask(ArrayRef<int> M, unsigned NumElts,
6628-
unsigned &WhichResultOut) {
6630+
unsigned &WhichResultOut, unsigned &OperandOrderOut) {
66296631
if (NumElts % 2 != 0)
66306632
return false;
6631-
// Check the first non-undef element for which half to use.
6632-
unsigned WhichResult = 2;
6633-
for (unsigned i = 0; i != NumElts / 2; i++) {
6634-
if (M[i * 2] >= 0) {
6635-
WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1);
6636-
break;
6637-
} else if (M[i * 2 + 1] >= 0) {
6638-
WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1);
6639-
break;
6640-
}
6641-
}
6642-
if (WhichResult == 2)
6643-
return false;
66446633

6634+
// "Variant" refers to the distinction bwetween zip1 and zip2, while
6635+
// "Order" refers to sequence of input registers (matching vs flipped).
6636+
bool Variant0Order0 = true;
6637+
bool Variant1Order0 = true;
6638+
bool Variant0Order1 = true;
6639+
bool Variant1Order1 = true;
66456640
// Check all elements match.
6646-
unsigned Idx = WhichResult * NumElts / 2;
66476641
for (unsigned i = 0; i != NumElts; i += 2) {
6648-
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
6649-
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
6650-
return false;
6651-
Idx += 1;
6642+
if (M[i] >= 0) {
6643+
if ((unsigned)M[i] != i / 2)
6644+
Variant0Order0 = false;
6645+
if ((unsigned)M[i] != NumElts / 2 + i / 2)
6646+
Variant1Order0 = false;
6647+
if ((unsigned)M[i] != NumElts + i / 2)
6648+
Variant0Order1 = false;
6649+
if ((unsigned)M[i] != NumElts + NumElts / 2 + i / 2)
6650+
Variant1Order1 = false;
6651+
}
6652+
if (M[i + 1] >= 0) {
6653+
if ((unsigned)M[i + 1] != NumElts + i / 2)
6654+
Variant0Order0 = false;
6655+
if ((unsigned)M[i + 1] != NumElts + NumElts / 2 + i / 2)
6656+
Variant1Order0 = false;
6657+
if ((unsigned)M[i + 1] != i / 2)
6658+
Variant0Order1 = false;
6659+
if ((unsigned)M[i + 1] != NumElts / 2 + i / 2)
6660+
Variant1Order1 = false;
6661+
}
66526662
}
6653-
WhichResultOut = WhichResult;
6663+
6664+
if (Variant0Order0 + Variant1Order0 + Variant0Order1 + Variant1Order1 != 1)
6665+
return false;
6666+
6667+
WhichResultOut = (Variant0Order0 || Variant0Order1) ? 0 : 1;
6668+
OperandOrderOut = (Variant0Order0 || Variant1Order0) ? 0 : 1;
66546669
return true;
66556670
}
66566671

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6041,7 +6041,7 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
60416041
if (LT.second.isFixedLengthVector() &&
60426042
LT.second.getVectorNumElements() == Mask.size() &&
60436043
(Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) &&
6044-
(isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6044+
(isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
60456045
isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
60466046
isREVMask(Mask, LT.second.getScalarSizeInBits(),
60476047
LT.second.getVectorNumElements(), 16) ||

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,10 +252,11 @@ bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
252252
ShuffleVectorPseudo &MatchInfo) {
253253
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
254254
unsigned WhichResult;
255+
unsigned OperandOrder;
255256
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
256257
Register Dst = MI.getOperand(0).getReg();
257258
unsigned NumElts = MRI.getType(Dst).getNumElements();
258-
if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
259+
if (!isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
259260
return false;
260261
unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
261262
Register V1 = MI.getOperand(1).getReg();

llvm/test/CodeGen/AArch64/arm64-zip.ll

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -355,48 +355,38 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
355355
ret <8 x i16> %3
356356
}
357357

358-
; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
359358
define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
360359
; CHECK-SD-LABEL: combine_v8i16_8first:
361360
; CHECK-SD: // %bb.0:
362-
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
363-
; CHECK-SD-NEXT: adrp x8, .LCPI25_0
364-
; CHECK-SD-NEXT: fmov d2, d0
365-
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0]
366-
; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
361+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
362+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
363+
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
367364
; CHECK-SD-NEXT: ret
368365
;
369366
; CHECK-GI-LABEL: combine_v8i16_8first:
370367
; CHECK-GI: // %bb.0:
371-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
372-
; CHECK-GI-NEXT: adrp x8, .LCPI25_0
373-
; CHECK-GI-NEXT: fmov d31, d1
374-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0]
375-
; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
368+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
369+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
370+
; CHECK-GI-NEXT: zip1.16b v0, v1, v0
376371
; CHECK-GI-NEXT: ret
377372
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
378373
ret <16 x i8> %3
379374
}
380375

381376

382-
; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
383377
define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
384378
; CHECK-SD-LABEL: combine_v8i16_8firstundef:
385379
; CHECK-SD: // %bb.0:
386-
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
387-
; CHECK-SD-NEXT: adrp x8, .LCPI26_0
388-
; CHECK-SD-NEXT: fmov d2, d0
389-
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0]
390-
; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
380+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
381+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
382+
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
391383
; CHECK-SD-NEXT: ret
392384
;
393385
; CHECK-GI-LABEL: combine_v8i16_8firstundef:
394386
; CHECK-GI: // %bb.0:
395-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
396-
; CHECK-GI-NEXT: adrp x8, .LCPI26_0
397-
; CHECK-GI-NEXT: fmov d31, d1
398-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
399-
; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
387+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
388+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
389+
; CHECK-GI-NEXT: zip1.16b v0, v1, v0
400390
; CHECK-GI-NEXT: ret
401391
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
402392
ret <16 x i8> %3

llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec
88
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
99
; CHECK-SD-NEXT: dup v2.2s, v0.s[1]
1010
; CHECK-SD-NEXT: mov v1.16b, v2.16b
11+
; CHECK-SD-NEXT: zip1 v2.4h, v0.4h, v2.4h
1112
; CHECK-SD-NEXT: mov v1.h[0], v0.h[1]
12-
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
13-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
13+
; CHECK-SD-NEXT: fmov d0, d2
1414
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
1515
; CHECK-SD-NEXT: ret
1616
;

llvm/test/CodeGen/AArch64/insert-extend.ll

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -66,86 +66,86 @@ define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr noca
6666
; CHECK-NEXT: ldr d5, [x11, x9]
6767
; CHECK-NEXT: shll2 v6.4s, v0.8h, #16
6868
; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b
69+
; CHECK-NEXT: shll2 v7.4s, v1.8h, #16
6970
; CHECK-NEXT: usubl v3.8h, v4.8b, v5.8b
70-
; CHECK-NEXT: shll2 v4.4s, v1.8h, #16
7171
; CHECK-NEXT: saddw v0.4s, v6.4s, v0.4h
72-
; CHECK-NEXT: shll2 v6.4s, v2.8h, #16
73-
; CHECK-NEXT: shll2 v5.4s, v3.8h, #16
74-
; CHECK-NEXT: saddw v1.4s, v4.4s, v1.4h
75-
; CHECK-NEXT: rev64 v4.4s, v0.4s
76-
; CHECK-NEXT: saddw v2.4s, v6.4s, v2.4h
77-
; CHECK-NEXT: saddw v3.4s, v5.4s, v3.4h
78-
; CHECK-NEXT: rev64 v5.4s, v1.4s
79-
; CHECK-NEXT: rev64 v6.4s, v2.4s
80-
; CHECK-NEXT: sub v4.4s, v0.4s, v4.4s
72+
; CHECK-NEXT: shll2 v5.4s, v2.8h, #16
73+
; CHECK-NEXT: saddw v1.4s, v7.4s, v1.4h
74+
; CHECK-NEXT: shll2 v4.4s, v3.8h, #16
75+
; CHECK-NEXT: rev64 v6.4s, v0.4s
76+
; CHECK-NEXT: saddw v2.4s, v5.4s, v2.4h
77+
; CHECK-NEXT: rev64 v7.4s, v1.4s
78+
; CHECK-NEXT: saddw v3.4s, v4.4s, v3.4h
79+
; CHECK-NEXT: rev64 v4.4s, v2.4s
80+
; CHECK-NEXT: sub v6.4s, v0.4s, v6.4s
8181
; CHECK-NEXT: addp v0.4s, v1.4s, v0.4s
82-
; CHECK-NEXT: rev64 v7.4s, v3.4s
83-
; CHECK-NEXT: sub v5.4s, v1.4s, v5.4s
84-
; CHECK-NEXT: sub v6.4s, v2.4s, v6.4s
82+
; CHECK-NEXT: rev64 v5.4s, v3.4s
83+
; CHECK-NEXT: sub v7.4s, v1.4s, v7.4s
84+
; CHECK-NEXT: sub v4.4s, v2.4s, v4.4s
8585
; CHECK-NEXT: addp v2.4s, v3.4s, v2.4s
86-
; CHECK-NEXT: zip1 v16.4s, v5.4s, v4.4s
87-
; CHECK-NEXT: sub v7.4s, v3.4s, v7.4s
88-
; CHECK-NEXT: zip2 v3.4s, v6.4s, v7.4s
89-
; CHECK-NEXT: mov v6.s[1], v7.s[0]
90-
; CHECK-NEXT: ext v7.16b, v5.16b, v16.16b, #8
91-
; CHECK-NEXT: mov v5.s[3], v4.s[2]
92-
; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8
93-
; CHECK-NEXT: mov v6.d[1], v7.d[1]
86+
; CHECK-NEXT: zip1 v16.4s, v7.4s, v6.4s
87+
; CHECK-NEXT: sub v5.4s, v3.4s, v5.4s
88+
; CHECK-NEXT: zip1 v3.4s, v4.4s, v5.4s
89+
; CHECK-NEXT: zip2 v4.4s, v4.4s, v5.4s
90+
; CHECK-NEXT: ext v5.16b, v7.16b, v16.16b, #8
91+
; CHECK-NEXT: mov v7.s[3], v6.s[2]
92+
; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8
9493
; CHECK-NEXT: mov v3.d[1], v5.d[1]
95-
; CHECK-NEXT: uzp1 v1.4s, v4.4s, v0.4s
96-
; CHECK-NEXT: uzp2 v4.4s, v4.4s, v0.4s
94+
; CHECK-NEXT: mov v4.d[1], v7.d[1]
95+
; CHECK-NEXT: uzp1 v1.4s, v6.4s, v0.4s
96+
; CHECK-NEXT: uzp2 v5.4s, v6.4s, v0.4s
9797
; CHECK-NEXT: addp v0.4s, v2.4s, v0.4s
98-
; CHECK-NEXT: add v5.4s, v3.4s, v6.4s
99-
; CHECK-NEXT: sub v3.4s, v6.4s, v3.4s
98+
; CHECK-NEXT: add v6.4s, v4.4s, v3.4s
99+
; CHECK-NEXT: sub v3.4s, v3.4s, v4.4s
100100
; CHECK-NEXT: rev64 v7.4s, v0.4s
101-
; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s
102-
; CHECK-NEXT: rev64 v4.4s, v5.4s
103-
; CHECK-NEXT: rev64 v6.4s, v3.4s
104-
; CHECK-NEXT: addp v16.4s, v0.4s, v5.4s
101+
; CHECK-NEXT: sub v1.4s, v1.4s, v5.4s
102+
; CHECK-NEXT: rev64 v4.4s, v6.4s
103+
; CHECK-NEXT: rev64 v5.4s, v3.4s
104+
; CHECK-NEXT: addp v16.4s, v0.4s, v6.4s
105105
; CHECK-NEXT: rev64 v2.4s, v1.4s
106106
; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s
107107
; CHECK-NEXT: zip1 v21.4s, v16.4s, v16.4s
108-
; CHECK-NEXT: sub v4.4s, v5.4s, v4.4s
109-
; CHECK-NEXT: addp v5.4s, v1.4s, v3.4s
110-
; CHECK-NEXT: sub v3.4s, v3.4s, v6.4s
108+
; CHECK-NEXT: sub v4.4s, v6.4s, v4.4s
109+
; CHECK-NEXT: addp v6.4s, v1.4s, v3.4s
110+
; CHECK-NEXT: sub v3.4s, v3.4s, v5.4s
111111
; CHECK-NEXT: sub v1.4s, v1.4s, v2.4s
112112
; CHECK-NEXT: ext v7.16b, v0.16b, v16.16b, #4
113113
; CHECK-NEXT: ext v2.16b, v16.16b, v4.16b, #4
114-
; CHECK-NEXT: ext v6.16b, v5.16b, v3.16b, #4
114+
; CHECK-NEXT: ext v5.16b, v6.16b, v3.16b, #4
115115
; CHECK-NEXT: mov v19.16b, v4.16b
116-
; CHECK-NEXT: ext v17.16b, v1.16b, v5.16b, #8
116+
; CHECK-NEXT: ext v17.16b, v1.16b, v6.16b, #8
117117
; CHECK-NEXT: mov v20.16b, v3.16b
118118
; CHECK-NEXT: trn2 v0.4s, v21.4s, v0.4s
119119
; CHECK-NEXT: ext v7.16b, v7.16b, v7.16b, #4
120120
; CHECK-NEXT: mov v19.s[2], v16.s[3]
121121
; CHECK-NEXT: zip2 v2.4s, v2.4s, v16.4s
122-
; CHECK-NEXT: zip2 v6.4s, v6.4s, v5.4s
123-
; CHECK-NEXT: mov v20.s[2], v5.s[3]
122+
; CHECK-NEXT: zip2 v5.4s, v5.4s, v6.4s
123+
; CHECK-NEXT: mov v20.s[2], v6.s[3]
124124
; CHECK-NEXT: ext v18.16b, v17.16b, v1.16b, #4
125-
; CHECK-NEXT: mov v1.s[2], v5.s[1]
125+
; CHECK-NEXT: mov v1.s[2], v6.s[1]
126126
; CHECK-NEXT: mov v21.16b, v7.16b
127127
; CHECK-NEXT: sub v7.4s, v0.4s, v7.4s
128128
; CHECK-NEXT: ext v2.16b, v4.16b, v2.16b, #12
129-
; CHECK-NEXT: ext v3.16b, v3.16b, v6.16b, #12
129+
; CHECK-NEXT: ext v3.16b, v3.16b, v5.16b, #12
130130
; CHECK-NEXT: uzp2 v4.4s, v17.4s, v18.4s
131-
; CHECK-NEXT: mov v6.16b, v1.16b
131+
; CHECK-NEXT: mov v5.16b, v1.16b
132132
; CHECK-NEXT: mov v17.16b, v19.16b
133133
; CHECK-NEXT: mov v18.16b, v20.16b
134134
; CHECK-NEXT: mov v21.s[0], v16.s[1]
135-
; CHECK-NEXT: mov v6.s[1], v5.s[0]
135+
; CHECK-NEXT: mov v5.s[1], v6.s[0]
136136
; CHECK-NEXT: mov v17.s[1], v16.s[2]
137137
; CHECK-NEXT: sub v16.4s, v19.4s, v2.4s
138-
; CHECK-NEXT: mov v18.s[1], v5.s[2]
138+
; CHECK-NEXT: mov v18.s[1], v6.s[2]
139139
; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s
140-
; CHECK-NEXT: sub v5.4s, v20.4s, v3.4s
140+
; CHECK-NEXT: sub v6.4s, v20.4s, v3.4s
141141
; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
142-
; CHECK-NEXT: add v4.4s, v6.4s, v4.4s
142+
; CHECK-NEXT: add v4.4s, v5.4s, v4.4s
143143
; CHECK-NEXT: add v2.4s, v17.4s, v2.4s
144144
; CHECK-NEXT: add v3.4s, v18.4s, v3.4s
145145
; CHECK-NEXT: mov v0.d[1], v7.d[1]
146146
; CHECK-NEXT: mov v4.d[1], v1.d[1]
147147
; CHECK-NEXT: mov v2.d[1], v16.d[1]
148-
; CHECK-NEXT: mov v3.d[1], v5.d[1]
148+
; CHECK-NEXT: mov v3.d[1], v6.d[1]
149149
; CHECK-NEXT: cmlt v7.8h, v0.8h, #0
150150
; CHECK-NEXT: cmlt v1.8h, v4.8h, #0
151151
; CHECK-NEXT: cmlt v6.8h, v2.8h, #0

0 commit comments

Comments
 (0)