Skip to content

Commit 67f85d3

Browse files
committed
[AArch64] recognise trn1/trn2 with flipped operands
1 parent 75e4438 commit 67f85d3

File tree

6 files changed

+224
-122
lines changed

6 files changed

+224
-122
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14815,9 +14815,10 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1481514815
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
1481614816
return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
1481714817
}
14818-
if (isTRNMask(ShuffleMask, NumElts, WhichResult)) {
14818+
if (isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
1481914819
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
14820-
return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
14820+
return DAG.getNode(Opc, DL, V1.getValueType(), OperandOrder == 0 ? V1 : V2,
14821+
OperandOrder == 0 ? V2 : V1);
1482114822
}
1482214823

1482314824
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
@@ -16529,7 +16530,7 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1652916530
isREVMask(M, EltSize, NumElts, 16) ||
1653016531
isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
1653116532
isSingletonEXTMask(M, VT, DummyUnsigned) ||
16532-
isTRNMask(M, NumElts, DummyUnsigned) ||
16533+
isTRNMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
1653316534
isUZPMask(M, NumElts, DummyUnsigned) ||
1653416535
isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
1653516536
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
@@ -31588,10 +31589,13 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
3158831589
OperandOrder == 0 ? Op1 : Op2,
3158931590
OperandOrder == 0 ? Op2 : Op1));
3159031591

31591-
if (isTRNMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) {
31592+
if (isTRNMask(ShuffleMask, VT.getVectorNumElements(), WhichResult,
31593+
OperandOrder)) {
3159231594
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
3159331595
return convertFromScalableVector(
31594-
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
31596+
DAG, VT,
31597+
DAG.getNode(Opc, DL, ContainerVT, OperandOrder == 0 ? Op1 : Op2,
31598+
OperandOrder == 0 ? Op2 : Op1));
3159531599
}
3159631600

3159731601
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult == 0)

llvm/lib/Target/AArch64/AArch64PerfectShuffle.h

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6699,33 +6699,52 @@ inline bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
66996699
}
67006700

67016701
/// Return true for trn1 or trn2 masks of the form:
6702-
/// <0, 8, 2, 10, 4, 12, 6, 14> or
6703-
/// <1, 9, 3, 11, 5, 13, 7, 15>
6702+
/// <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0, OperandOrderOut = 0) or
6703+
/// <1, 9, 3, 11, 5, 13, 7, 15> (WhichResultOut = 1, OperandOrderOut = 0) or
6704+
/// <8, 0, 10, 2, 12, 4, 14, 6> (WhichResultOut = 0, OperandOrderOut = 1) or
6705+
/// <9, 1, 11, 3, 13, 5, 15, 7> (WhichResultOut = 1, OperandOrderOut = 1) or
67046706
inline bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
6705-
unsigned &WhichResultOut) {
6707+
unsigned &WhichResultOut, unsigned &OperandOrderOut) {
67066708
if (NumElts % 2 != 0)
67076709
return false;
6708-
// Check the first non-undef element for trn1 vs trn2.
6709-
unsigned WhichResult = 2;
6710+
6711+
// "Variant" refers to the distinction bwetween trn1 and trn2, while
6712+
// "Order" refers to sequence of input registers (matching vs flipped).
6713+
bool Variant0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0
6714+
bool Variant1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0
6715+
bool Variant0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1
6716+
bool Variant1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1
6717+
// Check all elements match.
67106718
for (unsigned i = 0; i != NumElts; i += 2) {
67116719
if (M[i] >= 0) {
6712-
WhichResult = ((unsigned)M[i] == i ? 0 : 1);
6713-
break;
6720+
unsigned EvenElt = (unsigned)M[i];
6721+
if (EvenElt != i)
6722+
Variant0Order0 = false;
6723+
if (EvenElt != i+1)
6724+
Variant1Order0 = false;
6725+
if (EvenElt != NumElts + i )
6726+
Variant0Order1 = false;
6727+
if (EvenElt != NumElts + i + 1)
6728+
Variant1Order1 = false;
67146729
}
67156730
if (M[i + 1] >= 0) {
6716-
WhichResult = ((unsigned)M[i + 1] == i + NumElts ? 0 : 1);
6717-
break;
6731+
unsigned OddElt = (unsigned)M[i + 1];
6732+
if (OddElt != NumElts + i)
6733+
Variant0Order0 = false;
6734+
if (OddElt != NumElts + i + 1)
6735+
Variant1Order0 = false;
6736+
if (OddElt != i)
6737+
Variant0Order1 = false;
6738+
if (OddElt != i + 1)
6739+
Variant1Order1 = false;
67186740
}
67196741
}
6720-
if (WhichResult == 2)
6742+
6743+
if (Variant0Order0 + Variant1Order0 + Variant0Order1 + Variant1Order1 != 1)
67216744
return false;
67226745

6723-
for (unsigned i = 0; i < NumElts; i += 2) {
6724-
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
6725-
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
6726-
return false;
6727-
}
6728-
WhichResultOut = WhichResult;
6746+
WhichResultOut = (Variant0Order0 || Variant0Order1) ? 0 : 1;
6747+
OperandOrderOut = (Variant0Order0 || Variant1Order0) ? 0 : 1;
67296748
return true;
67306749
}
67316750

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,14 +215,15 @@ bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
215215
ShuffleVectorPseudo &MatchInfo) {
216216
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217217
unsigned WhichResult;
218+
unsigned OperandOrder;
218219
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
219220
Register Dst = MI.getOperand(0).getReg();
220221
unsigned NumElts = MRI.getType(Dst).getNumElements();
221-
if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
222+
if (!isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
222223
return false;
223224
unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
224-
Register V1 = MI.getOperand(1).getReg();
225-
Register V2 = MI.getOperand(2).getReg();
225+
Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
226+
Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
226227
MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
227228
return true;
228229
}

llvm/test/CodeGen/AArch64/arm64-trn.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,87 @@ define <4 x float> @vtrnQf(ptr %A, ptr %B) nounwind {
246246
ret <4 x float> %tmp5
247247
}
248248

249+
define <8 x i8> @vtrni8_8first(ptr %A, ptr %B) nounwind {
250+
; CHECKLE-LABEL: vtrni8_8first:
251+
; CHECKLE: // %bb.0:
252+
; CHECKLE-NEXT: ldr d0, [x0]
253+
; CHECKLE-NEXT: ldr d1, [x1]
254+
; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b
255+
; CHECKLE-NEXT: trn2 v0.8b, v0.8b, v1.8b
256+
; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
257+
; CHECKLE-NEXT: ret
258+
;
259+
; CHECKBE-LABEL: vtrni8_8first:
260+
; CHECKBE: // %bb.0:
261+
; CHECKBE-NEXT: ld1 { v0.8b }, [x0]
262+
; CHECKBE-NEXT: ld1 { v1.8b }, [x1]
263+
; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b
264+
; CHECKBE-NEXT: trn2 v0.8b, v0.8b, v1.8b
265+
; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
266+
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
267+
; CHECKBE-NEXT: ret
268+
%tmp1 = load <8 x i8>, ptr %A
269+
%tmp2 = load <8 x i8>, ptr %B
270+
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6>
271+
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
272+
%tmp5 = add <8 x i8> %tmp3, %tmp4
273+
ret <8 x i8> %tmp5
274+
}
275+
276+
define <8 x i8> @vtrni8_9first(ptr %A, ptr %B) nounwind {
277+
; CHECKLE-LABEL: vtrni8_9first:
278+
; CHECKLE: // %bb.0:
279+
; CHECKLE-NEXT: ldr d0, [x0]
280+
; CHECKLE-NEXT: ldr d1, [x1]
281+
; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b
282+
; CHECKLE-NEXT: trn2 v0.8b, v1.8b, v0.8b
283+
; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
284+
; CHECKLE-NEXT: ret
285+
;
286+
; CHECKBE-LABEL: vtrni8_9first:
287+
; CHECKBE: // %bb.0:
288+
; CHECKBE-NEXT: ld1 { v0.8b }, [x0]
289+
; CHECKBE-NEXT: ld1 { v1.8b }, [x1]
290+
; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b
291+
; CHECKBE-NEXT: trn2 v0.8b, v1.8b, v0.8b
292+
; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
293+
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
294+
; CHECKBE-NEXT: ret
295+
%tmp1 = load <8 x i8>, ptr %A
296+
%tmp2 = load <8 x i8>, ptr %B
297+
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6>
298+
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7>
299+
%tmp5 = add <8 x i8> %tmp3, %tmp4
300+
ret <8 x i8> %tmp5
301+
}
302+
303+
define <8 x i8> @vtrni8_89first_undef(ptr %A, ptr %B) nounwind {
304+
; CHECKLE-LABEL: vtrni8_89first_undef:
305+
; CHECKLE: // %bb.0:
306+
; CHECKLE-NEXT: ldr d0, [x0]
307+
; CHECKLE-NEXT: ldr d1, [x1]
308+
; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b
309+
; CHECKLE-NEXT: trn2 v0.8b, v1.8b, v0.8b
310+
; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
311+
; CHECKLE-NEXT: ret
312+
;
313+
; CHECKBE-LABEL: vtrni8_89first_undef:
314+
; CHECKBE: // %bb.0:
315+
; CHECKBE-NEXT: ld1 { v0.8b }, [x0]
316+
; CHECKBE-NEXT: ld1 { v1.8b }, [x1]
317+
; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b
318+
; CHECKBE-NEXT: trn2 v0.8b, v1.8b, v0.8b
319+
; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
320+
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
321+
; CHECKBE-NEXT: ret
322+
%tmp1 = load <8 x i8>, ptr %A
323+
%tmp2 = load <8 x i8>, ptr %B
324+
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 8, i32 0, i32 poison, i32 2, i32 poison, i32 4, i32 14, i32 6>
325+
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 9, i32 1, i32 poison, i32 3, i32 13, i32 5, i32 15, i32 poison>
326+
%tmp5 = add <8 x i8> %tmp3, %tmp4
327+
ret <8 x i8> %tmp5
328+
}
329+
249330
; Undef shuffle indices (even at the start of the shuffle mask) should not prevent matching to VTRN:
250331

251332
define <8 x i8> @vtrni8_undef(ptr %A, ptr %B) nounwind {

llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec
66
; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
77
; CHECK-SD: // %bb.0:
88
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
9-
; CHECK-SD-NEXT: dup v2.2s, v0.s[1]
10-
; CHECK-SD-NEXT: mov v1.16b, v2.16b
11-
; CHECK-SD-NEXT: zip1 v2.4h, v0.4h, v2.4h
12-
; CHECK-SD-NEXT: mov v1.h[0], v0.h[1]
9+
; CHECK-SD-NEXT: dup v1.2s, v0.s[1]
10+
; CHECK-SD-NEXT: zip1 v2.4h, v0.4h, v1.4h
11+
; CHECK-SD-NEXT: trn2 v1.4h, v0.4h, v1.4h
1312
; CHECK-SD-NEXT: fmov d0, d2
14-
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
1513
; CHECK-SD-NEXT: ret
1614
;
1715
; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:

0 commit comments

Comments
 (0)