Skip to content

Commit beed796

Browse files
authored
[LoongArch] Custom legalize vector_shuffle to xvpermi.d when possible (#160429)
1 parent 77a3d43 commit beed796

File tree

4 files changed

+44
-56
lines changed

4 files changed

+44
-56
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,7 +1603,7 @@ static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
16031603
/// value is necessary in order to fit the above form.
16041604
static SDValue
16051605
lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1606-
SDValue V1, SDValue V2, SelectionDAG &DAG,
1606+
SDValue V1, SelectionDAG &DAG,
16071607
const LoongArchSubtarget &Subtarget) {
16081608
int SplatIndex = -1;
16091609
for (const auto &M : Mask) {
@@ -1996,8 +1996,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
19961996
SDValue Result;
19971997
// TODO: Add more comparison patterns.
19981998
if (V2.isUndef()) {
1999-
if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
2000-
Subtarget)))
1999+
if ((Result =
2000+
lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
20012001
return Result;
20022002
if ((Result =
20032003
lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
@@ -2053,7 +2053,7 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20532053
/// value is necessary in order to fit the above form.
20542054
static SDValue
20552055
lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2056-
SDValue V1, SDValue V2, SelectionDAG &DAG,
2056+
SDValue V1, SelectionDAG &DAG,
20572057
const LoongArchSubtarget &Subtarget) {
20582058
int SplatIndex = -1;
20592059
for (const auto &M : Mask) {
@@ -2096,10 +2096,29 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20962096
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
20972097
}
20982098

2099+
/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2100+
static SDValue
2101+
lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2102+
SDValue V1, SelectionDAG &DAG,
2103+
const LoongArchSubtarget &Subtarget) {
2104+
// Only consider XVPERMI_D.
2105+
if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2106+
return SDValue();
2107+
2108+
unsigned MaskImm = 0;
2109+
for (unsigned i = 0; i < Mask.size(); ++i) {
2110+
if (Mask[i] == -1)
2111+
continue;
2112+
MaskImm |= Mask[i] << (i * 2);
2113+
}
2114+
2115+
return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2116+
DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2117+
}
2118+
20992119
/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
21002120
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
2101-
MVT VT, SDValue V1, SDValue V2,
2102-
SelectionDAG &DAG,
2121+
MVT VT, SDValue V1, SelectionDAG &DAG,
21032122
const LoongArchSubtarget &Subtarget) {
21042123
// LoongArch LASX only have XVPERM_W.
21052124
if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
@@ -2540,14 +2559,16 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25402559
SDValue Result;
25412560
// TODO: Add more comparison patterns.
25422561
if (V2.isUndef()) {
2543-
if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG,
2544-
Subtarget)))
2562+
if ((Result =
2563+
lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
25452564
return Result;
25462565
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
25472566
Subtarget)))
25482567
return Result;
25492568
if ((Result =
2550-
lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2569+
lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2570+
return Result;
2571+
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
25512572
return Result;
25522573

25532574
// TODO: This comment may be enabled in the future to better match the

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77
define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
88
; CHECK-LABEL: shufflevector_v4f64:
99
; CHECK: # %bb.0: # %entry
10-
; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3
11-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238
12-
; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1
13-
; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
10+
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 3
11+
; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3
12+
; CHECK-NEXT: vextrins.d $vr2, $vr3, 16
1413
; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2
1514
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
16-
; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
15+
; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
1716
; CHECK-NEXT: ret
1817
entry:
1918
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7>

llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll

Lines changed: 9 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
66
; CHECK-LABEL: shuffle_v32i8:
77
; CHECK: # %bb.0:
88
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
9-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
10-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
11-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
12-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
13-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
9+
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_0)
10+
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
1411
; CHECK-NEXT: xvshuf.h $xr1, $xr2, $xr0
1512
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
1613
; CHECK-NEXT: ret
@@ -34,11 +31,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
3431
; CHECK-LABEL: shuffle_v16i16:
3532
; CHECK: # %bb.0:
3633
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
37-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
38-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1)
39-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1)
40-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
41-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
34+
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0)
35+
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
4236
; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0
4337
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
4438
; CHECK-NEXT: ret
@@ -72,10 +66,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
7266
define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
7367
; CHECK-LABEL: shuffle_v8i32_same_lane:
7468
; CHECK: # %bb.0:
75-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
76-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
77-
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
78-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
69+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225
7970
; CHECK-NEXT: ret
8071
%shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
8172
ret <8 x i32> %shuffle
@@ -84,14 +75,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
8475
define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
8576
; CHECK-LABEL: shuffle_v4i64:
8677
; CHECK: # %bb.0:
87-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
88-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
89-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1)
90-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1)
91-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
92-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
93-
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
94-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
78+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39
9579
; CHECK-NEXT: ret
9680
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
9781
ret <4 x i64> %shuffle
@@ -100,10 +84,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
10084
define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) {
10185
; CHECK-LABEL: shuffle_v4i64_same_lane:
10286
; CHECK: # %bb.0:
103-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
104-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0)
105-
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
106-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
87+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225
10788
; CHECK-NEXT: ret
10889
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
10990
ret <4 x i64> %shuffle
@@ -136,14 +117,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> %a) {
136117
define <4 x double> @shuffle_v4f64(<4 x double> %a) {
137118
; CHECK-LABEL: shuffle_v4f64:
138119
; CHECK: # %bb.0:
139-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
140-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
141-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1)
142-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1)
143-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
144-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
145-
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
146-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
120+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39
147121
; CHECK-NEXT: ret
148122
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
149123
ret <4 x double> %shuffle
@@ -152,11 +126,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) {
152126
define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) {
153127
; CHECK-LABEL: shuffle_v4f64_same_lane:
154128
; CHECK: # %bb.0:
155-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
156-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
157-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
158-
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
159-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
129+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 75
160130
; CHECK-NEXT: ret
161131
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
162132
ret <4 x double> %shuffle

llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
127127
define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
128128
; CHECK-LABEL: byte_rotate_v4i64_3:
129129
; CHECK: # %bb.0:
130-
; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8
131-
; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
132-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
130+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177
133131
; CHECK-NEXT: ret
134132
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
135133
ret <4 x i64> %shuffle

0 commit comments

Comments
 (0)