@@ -321,13 +321,49 @@ static Optional<unsigned> getInsertIndex(const Value *InsertInst,
321321 return Index;
322322}
323323
324+ namespace {
325+ /// Specifies the way the mask should be analyzed for undefs/poisonous elements
326+ /// in the shuffle mask.
327+ enum class UseMask {
328+ FirstArg, ///< The mask is expected to be for permutation of 1-2 vectors,
329+ ///< check for the mask elements for the first argument (mask
330+ ///< indices are in range [0:VF)).
331+ SecondArg, ///< The mask is expected to be for permutation of 2 vectors, check
332+ ///< for the mask elements for the second argument (mask indices
333+ ///< are in range [VF:2*VF))
334+ UndefsAsMask ///< Consider undef mask elements (-1) as placeholders for
335+ ///< future shuffle elements and mark them as ones as being used
336+ ///< in future. Non-undef elements are considered as unused since
337+ ///< they're already marked as used in the mask.
338+ };
339+ } // namespace
340+
341+ /// Prepares a use bitset for the given mask either for the first argument or
342+ /// for the second.
343+ static SmallBitVector buildUseMask(int VF, ArrayRef<int> Mask,
344+ UseMask MaskArg) {
345+ SmallBitVector UseMask(VF, true);
346+ for (auto P : enumerate(Mask)) {
347+ if (P.value() == UndefMaskElem) {
348+ if (MaskArg == UseMask::UndefsAsMask)
349+ UseMask.reset(P.index());
350+ continue;
351+ }
352+ if (MaskArg == UseMask::FirstArg && P.value() < VF)
353+ UseMask.reset(P.value());
354+ else if (MaskArg == UseMask::SecondArg && P.value() >= VF)
355+ UseMask.reset(P.value() - VF);
356+ }
357+ return UseMask;
358+ }
359+
324360/// Checks if the given value is actually an undefined constant vector.
325- /// Also, if the\p ShuffleMask is not empty, tries to check if the non-masked
361+ /// Also, if the \p UseMask is not empty, tries to check if the non-masked
326362/// elements actually mask the insertelement buildvector, if any.
327363template <bool IsPoisonOnly = false>
328364static SmallBitVector isUndefVector(const Value *V,
329- ArrayRef<int> ShuffleMask = std::nullopt ) {
330- SmallBitVector Res(ShuffleMask .empty() ? 1 : ShuffleMask .size(), true);
365+ const SmallBitVector &UseMask = {} ) {
366+ SmallBitVector Res(UseMask .empty() ? 1 : UseMask .size(), true);
331367 using T = std::conditional_t<IsPoisonOnly, PoisonValue, UndefValue>;
332368 if (isa<T>(V))
333369 return Res;
@@ -336,7 +372,7 @@ static SmallBitVector isUndefVector(const Value *V,
336372 return Res.reset();
337373 auto *C = dyn_cast<Constant>(V);
338374 if (!C) {
339- if (!ShuffleMask .empty()) {
375+ if (!UseMask .empty()) {
340376 const Value *Base = V;
341377 while (auto *II = dyn_cast<InsertElementInst>(Base)) {
342378 if (isa<T>(II->getOperand(1)))
@@ -345,14 +381,14 @@ static SmallBitVector isUndefVector(const Value *V,
345381 Optional<unsigned> Idx = getInsertIndex(II);
346382 if (!Idx)
347383 continue;
348- if (*Idx < ShuffleMask .size() && ShuffleMask[ *Idx] == UndefMaskElem )
384+ if (*Idx < UseMask .size() && !UseMask.test( *Idx) )
349385 Res.reset(*Idx);
350386 }
351387 // TODO: Add analysis for shuffles here too.
352388 if (V == Base) {
353389 Res.reset();
354390 } else {
355- SmallVector<int> SubMask(ShuffleMask .size(), UndefMaskElem );
391+ SmallBitVector SubMask(UseMask .size(), false );
356392 Res &= isUndefVector<IsPoisonOnly>(Base, SubMask);
357393 }
358394 } else {
@@ -363,8 +399,7 @@ static SmallBitVector isUndefVector(const Value *V,
363399 for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
364400 if (Constant *Elem = C->getAggregateElement(I))
365401 if (!isa<T>(Elem) &&
366- (ShuffleMask.empty() ||
367- (I < ShuffleMask.size() && ShuffleMask[I] == UndefMaskElem)))
402+ (UseMask.empty() || (I < UseMask.size() && !UseMask.test(I))))
368403 Res.reset(I);
369404 }
370405 return Res;
@@ -6720,7 +6755,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
67206755 // TODO: Implement the analysis of the FirstInsert->getOperand(0)
67216756 // subvector of ActualVecTy.
67226757 SmallBitVector InMask =
6723- isUndefVector(FirstInsert->getOperand(0), InsertMask);
6758+ isUndefVector(FirstInsert->getOperand(0),
6759+ buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask));
67246760 if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) {
67256761 if (InsertVecSz != VecSz) {
67266762 auto *ActualVecTy =
@@ -7350,12 +7386,14 @@ static T *performExtractsShuffleAction(
73507386 SmallVector<int> Mask(ShuffleMask.begin()->second);
73517387 auto VMIt = std::next(ShuffleMask.begin());
73527388 T *Prev = nullptr;
7353- SmallBitVector IsBaseUndef = isUndefVector(Base, Mask);
7389+ SmallBitVector UseMask =
7390+ buildUseMask(Mask.size(), Mask, UseMask::UndefsAsMask);
7391+ SmallBitVector IsBaseUndef = isUndefVector(Base, UseMask);
73547392 if (!IsBaseUndef.all()) {
73557393 // Base is not undef, need to combine it with the next subvectors.
73567394 std::pair<T *, bool> Res =
73577395 ResizeAction(ShuffleMask.begin()->first, Mask, /*ForSingleMask=*/false);
7358- SmallBitVector IsBasePoison = isUndefVector<true>(Base, Mask );
7396+ SmallBitVector IsBasePoison = isUndefVector<true>(Base, UseMask );
73597397 for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
73607398 if (Mask[Idx] == UndefMaskElem)
73617399 Mask[Idx] = IsBasePoison.test(Idx) ? UndefMaskElem : Idx;
@@ -8512,19 +8550,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
85128550 if (Mask[I] != UndefMaskElem)
85138551 InsertMask[Offset + I] = I;
85148552 }
8553+ SmallBitVector UseMask =
8554+ buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask);
85158555 SmallBitVector IsFirstUndef =
8516- isUndefVector(FirstInsert->getOperand(0), InsertMask );
8556+ isUndefVector(FirstInsert->getOperand(0), UseMask );
85178557 if ((!IsIdentity || Offset != 0 || !IsFirstUndef.all()) &&
85188558 NumElts != NumScalars) {
85198559 if (IsFirstUndef.all()) {
85208560 if (!ShuffleVectorInst::isIdentityMask(InsertMask)) {
8521- SmallBitVector IsFirstPoison =
8522- isUndefVector<true>(FirstInsert->getOperand(0), InsertMask );
8523- if (!IsFirstPoison.all()) {
8524- for (unsigned I = 0; I < NumElts; I++) {
8525- if (InsertMask[I] == UndefMaskElem && !IsFirstPoison.test(I))
8526- InsertMask[I] = I + NumElts;
8527- }
8561+ SmallBitVector IsFirstPoison =
8562+ isUndefVector<true>(FirstInsert->getOperand(0), UseMask );
8563+ if (!IsFirstPoison.all()) {
8564+ for (unsigned I = 0; I < NumElts; I++) {
8565+ if (InsertMask[I] == UndefMaskElem && !IsFirstPoison.test(I))
8566+ InsertMask[I] = I + NumElts;
8567+ }
85288568 }
85298569 V = Builder.CreateShuffleVector(
85308570 V,
@@ -8538,7 +8578,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
85388578 }
85398579 } else {
85408580 SmallBitVector IsFirstPoison =
8541- isUndefVector<true>(FirstInsert->getOperand(0), InsertMask );
8581+ isUndefVector<true>(FirstInsert->getOperand(0), UseMask );
85428582 for (unsigned I = 0; I < NumElts; I++) {
85438583 if (InsertMask[I] == UndefMaskElem)
85448584 InsertMask[I] = IsFirstPoison.test(I) ? UndefMaskElem : I;
@@ -9268,8 +9308,18 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
92689308 if (IsIdentityMask(Mask, cast<FixedVectorType>(SV->getType())) ||
92699309 SV->isZeroEltSplat())
92709310 break;
9271- bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask).all();
9272- bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask).all();
9311+ int LocalVF = Mask.size();
9312+ if (auto *SVOpTy =
9313+ dyn_cast<FixedVectorType>(SV->getOperand(0)->getType()))
9314+ LocalVF = SVOpTy->getNumElements();
9315+ bool IsOp1Undef =
9316+ isUndefVector(SV->getOperand(0),
9317+ buildUseMask(LocalVF, Mask, UseMask::FirstArg))
9318+ .all();
9319+ bool IsOp2Undef =
9320+ isUndefVector(SV->getOperand(1),
9321+ buildUseMask(LocalVF, Mask, UseMask::SecondArg))
9322+ .all();
92739323 if (!IsOp1Undef && !IsOp2Undef)
92749324 break;
92759325 SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
@@ -9289,7 +9339,11 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
92899339 &CombineMasks](Value *V1, Value *V2,
92909340 ArrayRef<int> Mask) -> Value * {
92919341 assert(V1 && "Expected at least one vector value.");
9292- if (V2 && !isUndefVector(V2, Mask).all()) {
9342+ int VF = Mask.size();
9343+ if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
9344+ VF = FTy->getNumElements();
9345+ if (V2 &&
9346+ !isUndefVector(V2, buildUseMask(VF, Mask, UseMask::SecondArg)).all()) {
92939347 // Peek through shuffles.
92949348 Value *Op1 = V1;
92959349 Value *Op2 = V2;
@@ -9313,12 +9367,20 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
93139367 // Check if we have 2 resizing shuffles - need to peek through operands
93149368 // again.
93159369 if (auto *SV1 = dyn_cast<ShuffleVectorInst>(Op1))
9316- if (auto *SV2 = dyn_cast<ShuffleVectorInst>(Op2))
9370+ if (auto *SV2 = dyn_cast<ShuffleVectorInst>(Op2)) {
9371+ SmallBitVector UseMask1 = buildUseMask(
9372+ cast<FixedVectorType>(SV1->getOperand(1)->getType())
9373+ ->getNumElements(),
9374+ CombinedMask1, UseMask::FirstArg);
9375+ SmallBitVector UseMask2 = buildUseMask(
9376+ cast<FixedVectorType>(SV2->getOperand(1)->getType())
9377+ ->getNumElements(),
9378+ CombinedMask2, UseMask::FirstArg);
93179379 if (SV1->getOperand(0)->getType() ==
93189380 SV2->getOperand(0)->getType() &&
93199381 SV1->getOperand(0)->getType() != SV1->getType() &&
9320- isUndefVector(SV1->getOperand(1), CombinedMask1 ).all() &&
9321- isUndefVector(SV2->getOperand(1), CombinedMask2 ).all()) {
9382+ isUndefVector(SV1->getOperand(1), UseMask1 ).all() &&
9383+ isUndefVector(SV2->getOperand(1), UseMask2 ).all()) {
93229384 Op1 = SV1->getOperand(0);
93239385 Op2 = SV2->getOperand(0);
93249386 SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),
@@ -9330,6 +9392,7 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
93309392 CombineMasks(ShuffleMask2, CombinedMask2);
93319393 CombinedMask2.swap(ShuffleMask2);
93329394 }
9395+ }
93339396 } while (PrevOp1 != Op1 || PrevOp2 != Op2);
93349397 VF = cast<VectorType>(Op1->getType())
93359398 ->getElementCount()
0 commit comments