Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14815,9 +14815,10 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
}
if (isTRNMask(ShuffleMask, NumElts, WhichResult)) {
if (isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
return DAG.getNode(Opc, DL, V1.getValueType(), OperandOrder == 0 ? V1 : V2,
OperandOrder == 0 ? V2 : V1);
}

if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
Expand Down Expand Up @@ -16529,7 +16530,7 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
isREVMask(M, EltSize, NumElts, 16) ||
isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
isSingletonEXTMask(M, VT, DummyUnsigned) ||
isTRNMask(M, NumElts, DummyUnsigned) ||
isTRNMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
isUZPMask(M, NumElts, DummyUnsigned) ||
isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
Expand Down Expand Up @@ -31588,10 +31589,13 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
OperandOrder == 0 ? Op1 : Op2,
OperandOrder == 0 ? Op2 : Op1));

if (isTRNMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) {
if (isTRNMask(ShuffleMask, VT.getVectorNumElements(), WhichResult,
OperandOrder)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
SDValue TRN =
DAG.getNode(Opc, DL, ContainerVT, OperandOrder == 0 ? Op1 : Op2,
OperandOrder == 0 ? Op2 : Op1);
return convertFromScalableVector(DAG, VT, TRN);
}

if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
Expand Down
52 changes: 36 additions & 16 deletions llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
Original file line number Diff line number Diff line change
Expand Up @@ -6699,33 +6699,53 @@ inline bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
}

/// Return true for trn1 or trn2 masks of the form:
/// <0, 8, 2, 10, 4, 12, 6, 14> or
/// <1, 9, 3, 11, 5, 13, 7, 15>
/// <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0, OperandOrderOut = 0) or
/// <1, 9, 3, 11, 5, 13, 7, 15> (WhichResultOut = 1, OperandOrderOut = 0) or
/// <8, 0, 10, 2, 12, 4, 14, 6> (WhichResultOut = 0, OperandOrderOut = 1) or
/// <9, 1, 11, 3, 13, 5, 15, 7> (WhichResultOut = 1, OperandOrderOut = 1) or
inline bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
unsigned &WhichResultOut) {
unsigned &WhichResultOut, unsigned &OperandOrderOut) {
if (NumElts % 2 != 0)
return false;
// Check the first non-undef element for trn1 vs trn2.
unsigned WhichResult = 2;

// "Result" corresponds to "WhichResultOut", selecting between trn1 and trn2.
// "Order" corresponds to "OperandOrderOut", selecting the order of operands
// for the instruction (flipped or not).
bool Result0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0
bool Result1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0
bool Result0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1
bool Result1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1
// Check all elements match.
for (unsigned i = 0; i != NumElts; i += 2) {
if (M[i] >= 0) {
WhichResult = ((unsigned)M[i] == i ? 0 : 1);
break;
unsigned EvenElt = (unsigned)M[i];
if (EvenElt != i)
Result0Order0 = false;
if (EvenElt != i + 1)
Result1Order0 = false;
if (EvenElt != NumElts + i)
Result0Order1 = false;
if (EvenElt != NumElts + i + 1)
Result1Order1 = false;
}
if (M[i + 1] >= 0) {
WhichResult = ((unsigned)M[i + 1] == i + NumElts ? 0 : 1);
break;
unsigned OddElt = (unsigned)M[i + 1];
if (OddElt != NumElts + i)
Result0Order0 = false;
if (OddElt != NumElts + i + 1)
Result1Order0 = false;
if (OddElt != i)
Result0Order1 = false;
if (OddElt != i + 1)
Result1Order1 = false;
}
}
if (WhichResult == 2)

if (Result0Order0 + Result1Order0 + Result0Order1 + Result1Order1 != 1)
return false;

for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
return false;
}
WhichResultOut = WhichResult;
WhichResultOut = (Result0Order0 || Result0Order1) ? 0 : 1;
OperandOrderOut = (Result0Order0 || Result1Order0) ? 0 : 1;
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,15 @@ bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
unsigned WhichResult;
unsigned OperandOrder;
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
Register Dst = MI.getOperand(0).getReg();
unsigned NumElts = MRI.getType(Dst).getNumElements();
if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
if (!isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
return false;
unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
Register V1 = MI.getOperand(1).getReg();
Register V2 = MI.getOperand(2).getReg();
Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
return true;
}
Expand Down
57 changes: 57 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-trn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,63 @@ define <4 x float> @vtrnQf(ptr %A, ptr %B) nounwind {
ret <4 x float> %tmp5
}

define <8 x i8> @vtrni8_trn1_flipped(<8 x i8> %A, <8 x i8> %B) nounwind {
; CHECKLE-LABEL: vtrni8_trn1_flipped:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: trn1 v0.8b, v1.8b, v0.8b
; CHECKLE-NEXT: ret
;
; CHECKBE-LABEL: vtrni8_trn1_flipped:
; CHECKBE: // %bb.0:
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
; CHECKBE-NEXT: rev64 v1.8b, v1.8b
; CHECKBE-NEXT: trn1 v0.8b, v1.8b, v0.8b
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
; CHECKBE-NEXT: ret
%tmp1 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6>
ret <8 x i8> %tmp1
}

define <8 x i8> @vtrni8_trn2_flipped(<8 x i8> %A, <8 x i8> %B) nounwind {
; CHECKLE-LABEL: vtrni8_trn2_flipped:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: trn2 v0.8b, v1.8b, v0.8b
; CHECKLE-NEXT: ret
;
; CHECKBE-LABEL: vtrni8_trn2_flipped:
; CHECKBE: // %bb.0:
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
; CHECKBE-NEXT: rev64 v1.8b, v1.8b
; CHECKBE-NEXT: trn2 v0.8b, v1.8b, v0.8b
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
; CHECKBE-NEXT: ret
%tmp1 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7>
ret <8 x i8> %tmp1
}

define <8 x i8> @vtrni8_both_flipped_with_poison_values(<8 x i8> %A, <8 x i8> %B) nounwind {
; CHECKLE-LABEL: vtrni8_both_flipped_with_poison_values:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b
; CHECKLE-NEXT: trn2 v0.8b, v1.8b, v0.8b
; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
; CHECKLE-NEXT: ret
;
; CHECKBE-LABEL: vtrni8_both_flipped_with_poison_values:
; CHECKBE: // %bb.0:
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
; CHECKBE-NEXT: rev64 v1.8b, v1.8b
; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b
; CHECKBE-NEXT: trn2 v0.8b, v1.8b, v0.8b
; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
; CHECKBE-NEXT: ret
%tmp1 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 poison, i32 0, i32 poison, i32 2, i32 poison, i32 4, i32 14, i32 6>
%tmp2 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 poison, i32 1, i32 poison, i32 3, i32 13, i32 5, i32 15, i32 poison>
%tmp3 = add <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}

; Undef shuffle indices (even at the start of the shuffle mask) should not prevent matching to VTRN:

define <8 x i8> @vtrni8_undef(ptr %A, ptr %B) nounwind {
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec
; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v2.2s, v0.s[1]
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: zip1 v2.4h, v0.4h, v2.4h
; CHECK-SD-NEXT: mov v1.h[0], v0.h[1]
; CHECK-SD-NEXT: dup v1.2s, v0.s[1]
; CHECK-SD-NEXT: zip1 v2.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: trn2 v1.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: fmov d0, d2
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
Expand Down
Loading