@@ -1990,6 +1990,48 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1990
1990
return lowerVECTOR_SHUFFLE_VSHUF4I (DL, Mask, VT, V1, V2, DAG, Subtarget);
1991
1991
}
1992
1992
1993
+ // / Lower VECTOR_SHUFFLE into XVPERM (if possible).
1994
+ static SDValue lowerVECTOR_SHUFFLE_XVPERM (const SDLoc &DL, ArrayRef<int > Mask,
1995
+ MVT VT, SDValue V1, SDValue V2,
1996
+ SelectionDAG &DAG) {
1997
+ // LoongArch LASX only have XVPERM_W.
1998
+ if (Mask.size () != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
1999
+ return SDValue ();
2000
+
2001
+ unsigned NumElts = VT.getVectorNumElements ();
2002
+ unsigned HalfSize = NumElts / 2 ;
2003
+ bool FrontLo = true , FrontHi = true ;
2004
+ bool BackLo = true , BackHi = true ;
2005
+
2006
+ auto inRange = [](int val, int low, int high) {
2007
+ return (val == -1 ) || (val >= low && val < high);
2008
+ };
2009
+
2010
+ for (unsigned i = 0 ; i < HalfSize; ++i) {
2011
+ int Fronti = Mask[i];
2012
+ int Backi = Mask[i + HalfSize];
2013
+
2014
+ FrontLo &= inRange (Fronti, 0 , HalfSize);
2015
+ FrontHi &= inRange (Fronti, HalfSize, NumElts);
2016
+ BackLo &= inRange (Backi, 0 , HalfSize);
2017
+ BackHi &= inRange (Backi, HalfSize, NumElts);
2018
+ }
2019
+
2020
+ // If both the lower and upper 128-bit parts access only one half of the
2021
+ // vector (either lower or upper), avoid using xvperm.w. The latency of
2022
+ // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2023
+ if ((FrontLo || FrontHi) && (BackLo || BackHi))
2024
+ return SDValue ();
2025
+
2026
+ SmallVector<SDValue, 8 > Masks;
2027
+ for (unsigned i = 0 ; i < NumElts; ++i)
2028
+ Masks.push_back (Mask[i] == -1 ? DAG.getUNDEF (MVT::i64 )
2029
+ : DAG.getConstant (Mask[i], DL, MVT::i64 ));
2030
+ SDValue MaskVec = DAG.getBuildVector (MVT::v8i32, DL, Masks);
2031
+
2032
+ return DAG.getNode (LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2033
+ }
2034
+
1993
2035
// / Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1994
2036
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV (const SDLoc &DL, ArrayRef<int > Mask,
1995
2037
MVT VT, SDValue V1, SDValue V2,
@@ -2396,6 +2438,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2396
2438
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, NewMask, VT, V1, V2, DAG,
2397
2439
Subtarget)))
2398
2440
return Result;
2441
+ if ((Result = lowerVECTOR_SHUFFLE_XVPERM (DL, NewMask, VT, V1, V2, DAG)))
2442
+ return Result;
2399
2443
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2400
2444
V1, V2, DAG)))
2401
2445
return Result;
0 commit comments