@@ -2068,7 +2068,10 @@ lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20682068
20692069 const auto &Begin = Mask.begin ();
20702070 const auto &End = Mask.end ();
2071- unsigned HalfSize = Mask.size () / 2 ;
2071+ int HalfSize = Mask.size () / 2 ;
2072+
2073+ if (SplatIndex >= HalfSize)
2074+ return SDValue ();
20722075
20732076 assert (SplatIndex < (int )Mask.size () && " Out of bounds mask index" );
20742077 if (fitsRegularPattern<int >(Begin, 1 , End - HalfSize, SplatIndex, 0 ) &&
@@ -2363,8 +2366,10 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
23632366// / The first case is the closest to LoongArch instructions and the other
23642367// / cases need to be converted to it for processing.
23652368// /
2366- // / This function may modify V1, V2 and Mask
2367- static void canonicalizeShuffleVectorByLane (
2369+ // / This function will return true for the last three cases above and will
2370+ // / modify V1, V2 and Mask. Otherwise, return false for the first case and
2371+ // / cross-lane shuffle cases.
2372+ static bool canonicalizeShuffleVectorByLane (
23682373 const SDLoc &DL, MutableArrayRef<int > Mask, MVT VT, SDValue &V1,
23692374 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
23702375
@@ -2388,15 +2393,15 @@ static void canonicalizeShuffleVectorByLane(
23882393 preMask = LowLaneTy;
23892394
23902395 if (std::all_of (Mask.begin () + HalfSize, Mask.end (), [&](int M) {
2391- return M < 0 || (M >= 0 && M < HalfSize ) ||
2392- (M >= MaskSize && M < MaskSize + HalfSize );
2396+ return M < 0 || (M >= HalfSize && M < MaskSize ) ||
2397+ (M >= MaskSize + HalfSize && M < MaskSize * 2 );
23932398 }))
2394- postMask = HighLaneTy ;
2399+ postMask = LowLaneTy ;
23952400 else if (std::all_of (Mask.begin () + HalfSize, Mask.end (), [&](int M) {
2396- return M < 0 || (M >= HalfSize && M < MaskSize ) ||
2397- (M >= MaskSize + HalfSize && M < MaskSize * 2 );
2401+ return M < 0 || (M >= 0 && M < HalfSize ) ||
2402+ (M >= MaskSize && M < MaskSize + HalfSize );
23982403 }))
2399- postMask = LowLaneTy ;
2404+ postMask = HighLaneTy ;
24002405
24012406 // The pre-half of mask is high lane type, and the post-half of mask
24022407 // is low lane type, which is closest to the LoongArch instructions.
@@ -2405,7 +2410,7 @@ static void canonicalizeShuffleVectorByLane(
24052410 // to the lower 128-bit of vector register, and the low lane of mask
24062411 // corresponds the higher 128-bit of vector register.
24072412 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2408- return ;
2413+ return false ;
24092414 }
24102415 if (preMask == LowLaneTy && postMask == HighLaneTy) {
24112416 V1 = DAG.getBitcast (MVT::v4i64, V1);
@@ -2459,8 +2464,10 @@ static void canonicalizeShuffleVectorByLane(
24592464 *it = *it < 0 ? *it : *it + HalfSize;
24602465 }
24612466 } else { // cross-lane
2462- return ;
2467+ return false ;
24632468 }
2469+
2470+ return true ;
24642471}
24652472
24662473// / Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
@@ -2526,28 +2533,21 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25262533 assert (Mask.size () % 2 == 0 && " Expected even mask size." );
25272534 assert (Mask.size () >= 4 && " Mask size is less than 4." );
25282535
2529- // canonicalize non cross-lane shuffle vector
2530- SmallVector<int > NewMask (Mask);
2531- canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG, Subtarget);
2532-
25332536 APInt KnownUndef, KnownZero;
2534- computeZeroableShuffleElements (NewMask , V1, V2, KnownUndef, KnownZero);
2537+ computeZeroableShuffleElements (Mask , V1, V2, KnownUndef, KnownZero);
25352538 APInt Zeroable = KnownUndef | KnownZero;
25362539
25372540 SDValue Result;
25382541 // TODO: Add more comparison patterns.
25392542 if (V2.isUndef ()) {
2540- if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI (DL, NewMask , VT, V1, V2, DAG,
2543+ if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI (DL, Mask , VT, V1, V2, DAG,
25412544 Subtarget)))
25422545 return Result;
2543- if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, NewMask , VT, V1, V2, DAG,
2546+ if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, Mask , VT, V1, V2, DAG,
25442547 Subtarget)))
25452548 return Result;
2546- if ((Result = lowerVECTOR_SHUFFLE_XVPERM (DL, NewMask, VT, V1, V2, DAG,
2547- Subtarget)))
2548- return Result;
2549- if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2550- V1, V2, DAG)))
2549+ if ((Result =
2550+ lowerVECTOR_SHUFFLE_XVPERM (DL, Mask, VT, V1, V2, DAG, Subtarget)))
25512551 return Result;
25522552
25532553 // TODO: This comment may be enabled in the future to better match the
@@ -2557,24 +2557,39 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25572557
25582558 // It is recommended not to change the pattern comparison order for better
25592559 // performance.
2560- if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, NewMask , VT, V1, V2, DAG)))
2560+ if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, Mask , VT, V1, V2, DAG)))
25612561 return Result;
2562- if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD (DL, NewMask , VT, V1, V2, DAG)))
2562+ if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD (DL, Mask , VT, V1, V2, DAG)))
25632563 return Result;
2564- if ((Result = lowerVECTOR_SHUFFLE_XVILVH (DL, NewMask , VT, V1, V2, DAG)))
2564+ if ((Result = lowerVECTOR_SHUFFLE_XVILVH (DL, Mask , VT, V1, V2, DAG)))
25652565 return Result;
2566- if ((Result = lowerVECTOR_SHUFFLE_XVILVL (DL, NewMask , VT, V1, V2, DAG)))
2566+ if ((Result = lowerVECTOR_SHUFFLE_XVILVL (DL, Mask , VT, V1, V2, DAG)))
25672567 return Result;
2568- if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV (DL, NewMask , VT, V1, V2, DAG)))
2568+ if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV (DL, Mask , VT, V1, V2, DAG)))
25692569 return Result;
2570- if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, NewMask , VT, V1, V2, DAG)))
2570+ if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, Mask , VT, V1, V2, DAG)))
25712571 return Result;
2572- if ((Result = lowerVECTOR_SHUFFLEAsShift (DL, NewMask , VT, V1, V2, DAG,
2573- Subtarget, Zeroable)))
2572+ if ((Result = lowerVECTOR_SHUFFLEAsShift (DL, Mask , VT, V1, V2, DAG, Subtarget ,
2573+ Zeroable)))
25742574 return Result;
2575- if ((Result = lowerVECTOR_SHUFFLEAsByteRotate (DL, NewMask , VT, V1, V2, DAG,
2575+ if ((Result = lowerVECTOR_SHUFFLEAsByteRotate (DL, Mask , VT, V1, V2, DAG,
25762576 Subtarget)))
25772577 return Result;
2578+
2579+ // canonicalize non cross-lane shuffle vector
2580+ SmallVector<int > NewMask (Mask);
2581+ if (canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG, Subtarget))
2582+ return lower256BitShuffle (DL, NewMask, VT, V1, V2, DAG, Subtarget);
2583+
2584+ // FIXME: Handling the remaining cases earlier can degrade performance
2585+ // in some situations. Further analysis is required to enable more
2586+ // effective optimizations.
2587+ if (V2.isUndef ()) {
2588+ if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2589+ V1, V2, DAG)))
2590+ return Result;
2591+ }
2592+
25782593 if (SDValue NewShuffle = widenShuffleMask (DL, NewMask, VT, V1, V2, DAG))
25792594 return NewShuffle;
25802595 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF (DL, NewMask, VT, V1, V2, DAG)))
0 commit comments