@@ -1026,46 +1026,54 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
10261026 MVT VT, SDValue V1, SDValue V2,
10271027 SelectionDAG &DAG) {
10281028
1029- // When the size is less than 4, lower cost instructions may be used.
1030- if (Mask.size () < 4 )
1031- return SDValue ();
1029+ unsigned SubVecSize = 4 ;
1030+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
1031+ VT == MVT::v4i64) {
1032+ SubVecSize = 2 ;
1033+ }
10321034
10331035 int SubMask[4 ] = {-1 , -1 , -1 , -1 };
1034- for (unsigned i = 0 ; i < 4 ; ++i) {
1035- for (unsigned j = i; j < Mask.size (); j += 4 ) {
1036- int Idx = Mask[j];
1036+ for (unsigned i = 0 ; i < SubVecSize ; ++i) {
1037+ for (unsigned j = i; j < Mask.size (); j += SubVecSize ) {
1038+ int M = Mask[j];
10371039
10381040 // Convert from vector index to 4-element subvector index
10391041 // If an index refers to an element outside of the subvector then give up
1040- if (Idx != -1 ) {
1041- Idx -= 4 * (j / 4 );
1042- if (Idx < 0 || Idx >= 4 )
1042+ if (M != -1 ) {
1043+ M -= 4 * (j / SubVecSize );
1044+ if (M < 0 || M >= 4 )
10431045 return SDValue ();
10441046 }
10451047
10461048 // If the mask has an undef, replace it with the current index.
10471049 // Note that it might still be undef if the current index is also undef
10481050 if (SubMask[i] == -1 )
1049- SubMask[i] = Idx ;
1051+ SubMask[i] = M ;
10501052 // Check that non-undef values are the same as in the mask. If they
10511053 // aren't then give up
1052- else if (Idx != -1 && Idx != SubMask[i])
1054+ else if (M != -1 && M != SubMask[i])
10531055 return SDValue ();
10541056 }
10551057 }
10561058
10571059 // Calculate the immediate. Replace any remaining undefs with zero
10581060 APInt Imm (64 , 0 );
1059- for (int i = 3 ; i >= 0 ; --i) {
1060- int Idx = SubMask[i];
1061+ for (int i = SubVecSize - 1 ; i >= 0 ; --i) {
1062+ int M = SubMask[i];
10611063
1062- if (Idx == -1 )
1063- Idx = 0 ;
1064+ if (M == -1 )
1065+ M = 0 ;
10641066
10651067 Imm <<= 2 ;
1066- Imm |= Idx & 0x3 ;
1068+ Imm |= M & 0x3 ;
10671069 }
10681070
1071+ // Return vshuf4i.d and xvshuf4i.d
1072+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
1073+ VT == MVT::v4i64)
1074+ return DAG.getNode (LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1075+ DAG.getConstant (Imm, DL, MVT::i64 ));
1076+
10691077 return DAG.getNode (LoongArchISD::VSHUF4I, DL, VT, V1,
10701078 DAG.getConstant (Imm, DL, MVT::i64 ));
10711079}
@@ -1389,6 +1397,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
13891397 return Result;
13901398 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD (DL, Mask, VT, V1, V2, DAG)))
13911399 return Result;
1400+ if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1401+ (Result = lowerVECTOR_SHUFFLE_VSHUF4I (DL, Mask, VT, V1, V2, DAG)))
1402+ return Result;
13921403 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (DL, Mask, VT, V1, V2, DAG,
13931404 Zeroable)))
13941405 return Result;
@@ -1447,10 +1458,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
14471458static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I (const SDLoc &DL, ArrayRef<int > Mask,
14481459 MVT VT, SDValue V1, SDValue V2,
14491460 SelectionDAG &DAG) {
1450- // When the size is less than or equal to 4, lower cost instructions may be
1451- // used.
1452- if (Mask.size () <= 4 )
1453- return SDValue ();
14541461 return lowerVECTOR_SHUFFLE_VSHUF4I (DL, Mask, VT, V1, V2, DAG);
14551462}
14561463
@@ -1832,6 +1839,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
18321839 return Result;
18331840 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, NewMask, VT, V1, V2, DAG)))
18341841 return Result;
1842+ if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
1843+ (Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, NewMask, VT, V1, V2, DAG)))
1844+ return Result;
18351845 if ((Result =
18361846 lowerVECTOR_SHUFFLEAsShift (DL, NewMask, VT, V1, V2, DAG, Zeroable)))
18371847 return Result;
0 commit comments