Skip to content

Commit d7a3ab2

Browse files
authored
[LoongArch] Use xvperm.w for cross-lane access within a single vector (#151634)
1 parent c34cdd7 commit d7a3ab2

File tree

2 files changed

+48
-14
lines changed

2 files changed

+48
-14
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,6 +1990,48 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
19901990
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
19911991
}
19921992

1993+
/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
1994+
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
1995+
MVT VT, SDValue V1, SDValue V2,
1996+
SelectionDAG &DAG) {
1997+
// LoongArch LASX only have XVPERM_W.
1998+
if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
1999+
return SDValue();
2000+
2001+
unsigned NumElts = VT.getVectorNumElements();
2002+
unsigned HalfSize = NumElts / 2;
2003+
bool FrontLo = true, FrontHi = true;
2004+
bool BackLo = true, BackHi = true;
2005+
2006+
auto inRange = [](int val, int low, int high) {
2007+
return (val == -1) || (val >= low && val < high);
2008+
};
2009+
2010+
for (unsigned i = 0; i < HalfSize; ++i) {
2011+
int Fronti = Mask[i];
2012+
int Backi = Mask[i + HalfSize];
2013+
2014+
FrontLo &= inRange(Fronti, 0, HalfSize);
2015+
FrontHi &= inRange(Fronti, HalfSize, NumElts);
2016+
BackLo &= inRange(Backi, 0, HalfSize);
2017+
BackHi &= inRange(Backi, HalfSize, NumElts);
2018+
}
2019+
2020+
// If both the lower and upper 128-bit parts access only one half of the
2021+
// vector (either lower or upper), avoid using xvperm.w. The latency of
2022+
// xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2023+
if ((FrontLo || FrontHi) && (BackLo || BackHi))
2024+
return SDValue();
2025+
2026+
SmallVector<SDValue, 8> Masks;
2027+
for (unsigned i = 0; i < NumElts; ++i)
2028+
Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
2029+
: DAG.getConstant(Mask[i], DL, MVT::i64));
2030+
SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2031+
2032+
return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2033+
}
2034+
19932035
/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
19942036
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
19952037
MVT VT, SDValue V1, SDValue V2,
@@ -2396,6 +2438,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
23962438
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
23972439
Subtarget)))
23982440
return Result;
2441+
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2442+
return Result;
23992443
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
24002444
V1, V2, DAG)))
24012445
return Result;

llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,8 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
6161
; CHECK-LABEL: shuffle_v8i32:
6262
; CHECK: # %bb.0:
6363
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
64-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0)
65-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_1)
66-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_1)
67-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
68-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
69-
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
70-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
64+
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_0)
65+
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
7166
; CHECK-NEXT: ret
7267
%shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
7368
ret <8 x i32> %shuffle
@@ -117,13 +112,8 @@ define <8 x float> @shuffle_v8f32(<8 x float> %a) {
117112
; CHECK-LABEL: shuffle_v8f32:
118113
; CHECK: # %bb.0:
119114
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
120-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI8_0)
121-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1)
122-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI8_1)
123-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
124-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
125-
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
126-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
115+
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI8_0)
116+
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
127117
; CHECK-NEXT: ret
128118
%shuffle = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
129119
ret <8 x float> %shuffle

0 commit comments

Comments
 (0)