Skip to content

Commit bf4ec31

Browse files
committed
[LoongArch] Optimize for reversing vector using shufflevector
1 parent 673ea46 commit bf4ec31

File tree

3 files changed

+56
-15
lines changed

3 files changed

+56
-15
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,46 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
17011701
DAG.getConstant(Imm, DL, GRLenVT));
17021702
}
17031703

1704+
/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1705+
///
1706+
/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1707+
/// reverse whose mask likes:
1708+
/// <7, 6, 5, 4, 3, 2, 1, 0>
1709+
///
1710+
/// When undef's appear in the mask they are treated as if they were whatever
1711+
/// value is necessary in order to fit the above forms.
1712+
static SDValue
1713+
lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1714+
SDValue V1, SelectionDAG &DAG,
1715+
const LoongArchSubtarget &Subtarget) {
1716+
// Only vectors with i8/i16 elements which cannot match other patterns
1717+
// directly needs to do this.
1718+
if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1719+
VT != MVT::v16i16)
1720+
return SDValue();
1721+
1722+
int MaskSize = Mask.size();
1723+
for (int i = 0; i < MaskSize; ++i) {
1724+
if (Mask[i] != -1 && Mask[i] != MaskSize - 1 - i)
1725+
return SDValue();
1726+
}
1727+
1728+
int WidenNumElts = VT.getVectorNumElements() / 4;
1729+
SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1730+
for (int i = 0; i < WidenNumElts; ++i)
1731+
WidenMask[i] = WidenNumElts - 1 - i;
1732+
1733+
MVT WidenVT = MVT::getVectorVT(
1734+
VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1735+
SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1736+
SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1737+
DAG.getUNDEF(WidenVT), WidenMask);
1738+
1739+
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1740+
DAG.getBitcast(VT, WidenRev),
1741+
DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1742+
}
1743+
17041744
/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
17051745
///
17061746
/// VPACKEV interleaves the even elements from each vector.
@@ -2004,6 +2044,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20042044
if ((Result =
20052045
lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
20062046
return Result;
2047+
if ((Result =
2048+
lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2049+
return Result;
20072050

20082051
// TODO: This comment may be enabled in the future to better match the
20092052
// pattern for instruction selection.
@@ -2619,6 +2662,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
26192662
return Result;
26202663
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
26212664
return Result;
2665+
if ((Result =
2666+
lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2667+
return Result;
26222668

26232669
// TODO: This comment may be enabled in the future to better match the
26242670
// pattern for instruction selection.

llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@ define void @shufflevector_reverse_v32i8(ptr %res, ptr %a) nounwind {
66
; CHECK-LABEL: shufflevector_reverse_v32i8:
77
; CHECK: # %bb.0: # %entry
88
; CHECK-NEXT: xvld $xr0, $a1, 0
9-
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
10-
; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI0_0)
119
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
12-
; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
10+
; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27
11+
; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
1312
; CHECK-NEXT: xvst $xr0, $a0, 0
1413
; CHECK-NEXT: ret
1514
entry:
@@ -23,11 +22,9 @@ define void @shufflevector_reverse_v16i16(ptr %res, ptr %a) nounwind {
2322
; CHECK-LABEL: shufflevector_reverse_v16i16:
2423
; CHECK: # %bb.0: # %entry
2524
; CHECK-NEXT: xvld $xr0, $a1, 0
26-
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
27-
; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0)
28-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
29-
; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
30-
; CHECK-NEXT: xvst $xr1, $a0, 0
25+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 27
26+
; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 27
27+
; CHECK-NEXT: xvst $xr0, $a0, 0
3128
; CHECK-NEXT: ret
3229
entry:
3330
%va = load <16 x i16>, ptr %a

llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ define void @shufflevector_reverse_v16i8(ptr %res, ptr %a) nounwind {
66
; CHECK-LABEL: shufflevector_reverse_v16i8:
77
; CHECK: # %bb.0: # %entry
88
; CHECK-NEXT: vld $vr0, $a1, 0
9-
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
10-
; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI0_0)
11-
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
9+
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27
10+
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
1211
; CHECK-NEXT: vst $vr0, $a0, 0
1312
; CHECK-NEXT: ret
1413
entry:
@@ -22,10 +21,9 @@ define void @shufflevector_reverse_v8i16(ptr %res, ptr %a) nounwind {
2221
; CHECK-LABEL: shufflevector_reverse_v8i16:
2322
; CHECK: # %bb.0: # %entry
2423
; CHECK-NEXT: vld $vr0, $a1, 0
25-
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
26-
; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI1_0)
27-
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
28-
; CHECK-NEXT: vst $vr1, $a0, 0
24+
; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
25+
; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 27
26+
; CHECK-NEXT: vst $vr0, $a0, 0
2927
; CHECK-NEXT: ret
3028
entry:
3129
%va = load <8 x i16>, ptr %a

0 commit comments

Comments
 (0)