Skip to content

Commit d83e246

Browse files
zhaoqi5dvbuka
authored andcommitted
[LoongArch] Optimize for reversing vector using shufflevector (llvm#163151)
1 parent 5a1f02d commit d83e246

File tree

3 files changed

+53
-15
lines changed

3 files changed

+53
-15
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,43 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
17011701
DAG.getConstant(Imm, DL, GRLenVT));
17021702
}
17031703

1704+
/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1705+
///
1706+
/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1707+
/// reverse whose mask likes:
1708+
/// <7, 6, 5, 4, 3, 2, 1, 0>
1709+
///
1710+
/// When undef's appear in the mask they are treated as if they were whatever
1711+
/// value is necessary in order to fit the above forms.
1712+
static SDValue
1713+
lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1714+
SDValue V1, SelectionDAG &DAG,
1715+
const LoongArchSubtarget &Subtarget) {
1716+
// Only vectors with i8/i16 elements which cannot match other patterns
1717+
// directly needs to do this.
1718+
if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1719+
VT != MVT::v16i16)
1720+
return SDValue();
1721+
1722+
if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1723+
return SDValue();
1724+
1725+
int WidenNumElts = VT.getVectorNumElements() / 4;
1726+
SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1727+
for (int i = 0; i < WidenNumElts; ++i)
1728+
WidenMask[i] = WidenNumElts - 1 - i;
1729+
1730+
MVT WidenVT = MVT::getVectorVT(
1731+
VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1732+
SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1733+
SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1734+
DAG.getUNDEF(WidenVT), WidenMask);
1735+
1736+
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1737+
DAG.getBitcast(VT, WidenRev),
1738+
DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1739+
}
1740+
17041741
/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
17051742
///
17061743
/// VPACKEV interleaves the even elements from each vector.
@@ -2004,6 +2041,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20042041
if ((Result =
20052042
lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
20062043
return Result;
2044+
if ((Result =
2045+
lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2046+
return Result;
20072047

20082048
// TODO: This comment may be enabled in the future to better match the
20092049
// pattern for instruction selection.
@@ -2622,6 +2662,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
26222662
return Result;
26232663
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
26242664
return Result;
2665+
if ((Result =
2666+
lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2667+
return Result;
26252668

26262669
// TODO: This comment may be enabled in the future to better match the
26272670
// pattern for instruction selection.

llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@ define void @shufflevector_reverse_v32i8(ptr %res, ptr %a) nounwind {
66
; CHECK-LABEL: shufflevector_reverse_v32i8:
77
; CHECK: # %bb.0: # %entry
88
; CHECK-NEXT: xvld $xr0, $a1, 0
9-
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
10-
; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI0_0)
119
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
12-
; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
10+
; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27
11+
; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
1312
; CHECK-NEXT: xvst $xr0, $a0, 0
1413
; CHECK-NEXT: ret
1514
entry:
@@ -23,11 +22,9 @@ define void @shufflevector_reverse_v16i16(ptr %res, ptr %a) nounwind {
2322
; CHECK-LABEL: shufflevector_reverse_v16i16:
2423
; CHECK: # %bb.0: # %entry
2524
; CHECK-NEXT: xvld $xr0, $a1, 0
26-
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
27-
; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0)
28-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
29-
; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
30-
; CHECK-NEXT: xvst $xr1, $a0, 0
25+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 27
26+
; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 27
27+
; CHECK-NEXT: xvst $xr0, $a0, 0
3128
; CHECK-NEXT: ret
3229
entry:
3330
%va = load <16 x i16>, ptr %a

llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ define void @shufflevector_reverse_v16i8(ptr %res, ptr %a) nounwind {
66
; CHECK-LABEL: shufflevector_reverse_v16i8:
77
; CHECK: # %bb.0: # %entry
88
; CHECK-NEXT: vld $vr0, $a1, 0
9-
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
10-
; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI0_0)
11-
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
9+
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27
10+
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
1211
; CHECK-NEXT: vst $vr0, $a0, 0
1312
; CHECK-NEXT: ret
1413
entry:
@@ -22,10 +21,9 @@ define void @shufflevector_reverse_v8i16(ptr %res, ptr %a) nounwind {
2221
; CHECK-LABEL: shufflevector_reverse_v8i16:
2322
; CHECK: # %bb.0: # %entry
2423
; CHECK-NEXT: vld $vr0, $a1, 0
25-
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
26-
; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI1_0)
27-
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
28-
; CHECK-NEXT: vst $vr1, $a0, 0
24+
; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
25+
; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 27
26+
; CHECK-NEXT: vst $vr0, $a0, 0
2927
; CHECK-NEXT: ret
3028
entry:
3129
%va = load <8 x i16>, ptr %a

0 commit comments

Comments
 (0)