@@ -10096,7 +10096,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
1009610096 if (Size != (int)ExpectedMask.size())
1009710097 return false;
1009810098 assert(llvm::all_of(ExpectedMask,
10099- [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
10099+ [Size](int M) {
10100+ return M == SM_SentinelZero || (M, 0, 2 * Size);
10101+ }) &&
1010010102 "Illegal target shuffle mask");
1010110103
1010210104 // Check for out-of-range target shuffle mask indices.
@@ -10119,6 +10121,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
1011910121 int ExpectedIdx = ExpectedMask[i];
1012010122 if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
1012110123 continue;
10124+ // If we failed to match an expected SM_SentinelZero then early out.
10125+ if (ExpectedIdx < 0)
10126+ return false;
1012210127 if (MaskIdx == SM_SentinelZero) {
1012310128 // If we need this expected index to be a zero element, then update the
1012410129 // relevant zero mask and perform the known bits at the end to minimize
@@ -39594,18 +39599,46 @@ static bool matchBinaryPermuteShuffle(
3959439599 ((MaskVT.is128BitVector() && Subtarget.hasVLX()) ||
3959539600 (MaskVT.is256BitVector() && Subtarget.hasVLX()) ||
3959639601 (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
39602+ MVT AlignVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits),
39603+ MaskVT.getSizeInBits() / EltSizeInBits);
3959739604 if (!isAnyZero(Mask)) {
3959839605 int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
3959939606 if (0 < Rotation) {
3960039607 Shuffle = X86ISD::VALIGN;
39601- if (EltSizeInBits == 64)
39602- ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
39603- else
39604- ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
39608+ ShuffleVT = AlignVT;
3960539609 PermuteImm = Rotation;
3960639610 return true;
3960739611 }
3960839612 }
39613+ // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.
39614+ unsigned ZeroLo = Zeroable.countr_one();
39615+ unsigned ZeroHi = Zeroable.countl_one();
39616+ assert((ZeroLo + ZeroHi) < NumMaskElts && "Zeroable shuffle detected");
39617+ if (ZeroLo) {
39618+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
39619+ std::iota(ShiftMask.begin() + ZeroLo, ShiftMask.end(), 0);
39620+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
39621+ V1 = V1;
39622+ V2 = getZeroVector(AlignVT, Subtarget, DAG, DL);
39623+ Shuffle = X86ISD::VALIGN;
39624+ ShuffleVT = AlignVT;
39625+ PermuteImm = NumMaskElts - ZeroLo;
39626+ return true;
39627+ }
39628+ }
39629+ if (ZeroHi) {
39630+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
39631+ std::iota(ShiftMask.begin(), ShiftMask.begin() + NumMaskElts - ZeroHi,
39632+ ZeroHi);
39633+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
39634+ V2 = V1;
39635+ V1 = getZeroVector(AlignVT, Subtarget, DAG, DL);
39636+ Shuffle = X86ISD::VALIGN;
39637+ ShuffleVT = AlignVT;
39638+ PermuteImm = ZeroHi;
39639+ return true;
39640+ }
39641+ }
3960939642 }
3961039643
3961139644 // Attempt to match against PALIGNR byte rotate.
0 commit comments