@@ -10096,7 +10096,10 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
1009610096 if (Size != (int)ExpectedMask.size())
1009710097 return false;
1009810098 assert(llvm::all_of(ExpectedMask,
10099- [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
10099+ [Size](int M) {
10100+ return M == SM_SentinelZero ||
10101+ isInRange(M, 0, 2 * Size);
10102+ }) &&
1010010103 "Illegal target shuffle mask");
1010110104
1010210105 // Check for out-of-range target shuffle mask indices.
@@ -10119,6 +10122,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
1011910122 int ExpectedIdx = ExpectedMask[i];
1012010123 if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
1012110124 continue;
10125+ // If we failed to match an expected SM_SentinelZero then early out.
10126+ if (ExpectedIdx < 0)
10127+ return false;
1012210128 if (MaskIdx == SM_SentinelZero) {
1012310129 // If we need this expected index to be a zero element, then update the
1012410130 // relevant zero mask and perform the known bits at the end to minimize
@@ -39594,18 +39600,46 @@ static bool matchBinaryPermuteShuffle(
3959439600 ((MaskVT.is128BitVector() && Subtarget.hasVLX()) ||
3959539601 (MaskVT.is256BitVector() && Subtarget.hasVLX()) ||
3959639602 (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
39603+ MVT AlignVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits),
39604+ MaskVT.getSizeInBits() / EltSizeInBits);
3959739605 if (!isAnyZero(Mask)) {
3959839606 int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
3959939607 if (0 < Rotation) {
3960039608 Shuffle = X86ISD::VALIGN;
39601- if (EltSizeInBits == 64)
39602- ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
39603- else
39604- ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
39609+ ShuffleVT = AlignVT;
3960539610 PermuteImm = Rotation;
3960639611 return true;
3960739612 }
3960839613 }
39614+ // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.
39615+ unsigned ZeroLo = Zeroable.countr_one();
39616+ unsigned ZeroHi = Zeroable.countl_one();
39617+ assert((ZeroLo + ZeroHi) < NumMaskElts && "Zeroable shuffle detected");
39618+ if (ZeroLo) {
39619+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
39620+ std::iota(ShiftMask.begin() + ZeroLo, ShiftMask.end(), 0);
39621+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
39622+ V1 = V1;
39623+ V2 = getZeroVector(AlignVT, Subtarget, DAG, DL);
39624+ Shuffle = X86ISD::VALIGN;
39625+ ShuffleVT = AlignVT;
39626+ PermuteImm = NumMaskElts - ZeroLo;
39627+ return true;
39628+ }
39629+ }
39630+ if (ZeroHi) {
39631+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
39632+ std::iota(ShiftMask.begin(), ShiftMask.begin() + NumMaskElts - ZeroHi,
39633+ ZeroHi);
39634+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
39635+ V2 = V1;
39636+ V1 = getZeroVector(AlignVT, Subtarget, DAG, DL);
39637+ Shuffle = X86ISD::VALIGN;
39638+ ShuffleVT = AlignVT;
39639+ PermuteImm = ZeroHi;
39640+ return true;
39641+ }
39642+ }
3960939643 }
3961039644
3961139645 // Attempt to match against PALIGNR byte rotate.
0 commit comments