@@ -3941,6 +3941,24 @@ static bool canScaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts) {
39413941  return scaleShuffleElements(Mask, NumDstElts, ScaledMask);
39423942}
39433943
3944+ // Helper to grow the shuffle mask for a larger value type.
3945+ // NOTE: This is different to scaleShuffleElements which is a same size type.
3946+ static void growShuffleMask(ArrayRef<int> SrcMask,
3947+                             SmallVectorImpl<int> &DstMask,
3948+                             unsigned SrcSizeInBits, unsigned DstSizeInBits) {
3949+   assert(DstMask.empty() && "Expected an empty shuffle mas");
3950+   assert((DstSizeInBits % SrcSizeInBits) == 0 && "Illegal shuffle scale");
3951+   unsigned Scale = DstSizeInBits / SrcSizeInBits;
3952+   unsigned NumSrcElts = SrcMask.size();
3953+   DstMask.assign(SrcMask.begin(), SrcMask.end());
3954+   for (int &M : DstMask) {
3955+     if (M < 0)
3956+       continue;
3957+     M = (M % NumSrcElts) + ((M / NumSrcElts) * Scale * NumSrcElts);
3958+   }
3959+   DstMask.append((Scale - 1) * NumSrcElts, SM_SentinelUndef);
3960+ }
3961+ 
39443962/// Returns true if Elt is a constant zero or a floating point constant +0.0.
39453963bool X86::isZeroNode(SDValue Elt) {
39463964  return isNullConstant(Elt) || isNullFPConstant(Elt);
@@ -40456,19 +40474,13 @@ static SDValue combineX86ShuffleChainWithExtract(
4045640474  }
4045740475
4045840476  // Bail if we fail to find a source larger than the existing root.
40459-   unsigned Scale = WideSizeInBits / RootSizeInBits;
4046040477  if (WideSizeInBits <= RootSizeInBits ||
4046140478      (WideSizeInBits % RootSizeInBits) != 0)
4046240479    return SDValue();
4046340480
4046440481  // Create new mask for larger type.
40465-   SmallVector<int, 64> WideMask(BaseMask);
40466-   for (int &M : WideMask) {
40467-     if (M < 0)
40468-       continue;
40469-     M = (M % NumMaskElts) + ((M / NumMaskElts) * Scale * NumMaskElts);
40470-   }
40471-   WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
40482+   SmallVector<int, 64> WideMask;
40483+   growShuffleMask(BaseMask, WideMask, RootSizeInBits, WideSizeInBits);
4047240484
4047340485  // Attempt to peek through inputs and adjust mask when we extract from an
4047440486  // upper subvector.
0 commit comments